diff --git a/.gitbook.yaml b/.gitbook.yaml index 703fd8f6e..f763fd123 100644 --- a/.gitbook.yaml +++ b/.gitbook.yaml @@ -57,6 +57,7 @@ redirects: output/cloudwatch: ./pipeline/outputs/cloudwatch.md output/datadog: ./pipeline/outputs/datadog.md output/es: ./pipeline/outputs/elasticsearch.md + output/fabric: ./pipeline/outputs/azure_kusto.md output/file: ./pipeline/outputs/file.md output/firehose: ./pipeline/outputs/firehose.md output/flowcounter: ./pipeline/outputs/flowcounter.md @@ -76,3 +77,6 @@ redirects: output/stdout: ./pipeline/outputs/standard-output.md output/tcp: ./pipeline/outputs/tcp-and-tls.md output/td: ./pipeline/outputs/treasure-data.md + + # Processors + processor/metrics_selector: ./pipeline/processors/metrics_selector.md diff --git a/.gitbook/assets/3.0.png b/.gitbook/assets/3.0.png new file mode 100644 index 000000000..3b5868101 Binary files /dev/null and b/.gitbook/assets/3.0.png differ diff --git a/.gitbook/assets/3.1.png b/.gitbook/assets/3.1.png new file mode 100644 index 000000000..7ec36d253 Binary files /dev/null and b/.gitbook/assets/3.1.png differ diff --git a/.gitbook/assets/3.2.png b/.gitbook/assets/3.2.png new file mode 100644 index 000000000..86258a229 Binary files /dev/null and b/.gitbook/assets/3.2.png differ diff --git a/.gitbook/assets/image (1).png b/.gitbook/assets/image (1).png index 2a6609e1c..50446b701 100644 Binary files a/.gitbook/assets/image (1).png and b/.gitbook/assets/image (1).png differ diff --git a/.gitbook/assets/image.png b/.gitbook/assets/image.png index e1ab889fd..e3493a87f 100644 Binary files a/.gitbook/assets/image.png and b/.gitbook/assets/image.png differ diff --git a/.gitbook/includes/untitled.md b/.gitbook/includes/untitled.md new file mode 100644 index 000000000..8441b798d --- /dev/null +++ b/.gitbook/includes/untitled.md @@ -0,0 +1,7 @@ +--- +title: Untitled +--- + +{% embed url="https://o11y-workshops.gitlab.io/workshop-fluentbit/lab01.html" fullWidth="false" %} +Lab 1 - Introduction to Fluent Bit +{% endembed %} diff --git a/.gitignore b/.gitignore index d7c216868..3c344ca72 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ _book/* node_modules/* .gitignore old/* - +.DS_Store diff --git a/.vale.ini b/.vale.ini new file mode 100755 index 000000000..c77fea578 --- /dev/null +++ b/.vale.ini @@ -0,0 +1,17 @@ +StylesPath = vale-styles +MinAlertLevel = suggestion # suggestion, warning or error + +[formats] +mdx = md + +[*.{md,markdown,txt,htm,html}] +BasedOnStyles = FluentBit +IgnoredScopes = tt, code + +FluentBit.Terms = YES +FluentBit.Titles = YES +FluentBit.FutureTense = NO +FluentBit.Headings = NO +FluentBit.Passive = NO +FluentBit.Subjunctive = NO +FluentBit.Colons = NO diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 000000000..a93d99381 --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1 @@ +* @fluent/fluent-bit-maintainers @fluent/chronosphere-tech-writers diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index be6c34ca5..6594d15e1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,76 +1,126 @@ -# Fluent Bit Documentation - -First of all, thanks for taking the time to read this document. It means you are interested in contributing, and we highly appreciate the time you are investing. - -## Introduction - -[Fluent Bit Documentation](https://docs.fluentbit.io) source code lives in a separate repository called [fluent/fluent-bit-docs](https://github.com/fluent/fluent-bit-docs) on Github. The reason of this separate repository is to avoid extra commits on Fluent Bit source code project history that leads to more complexity when maintaining the core project: yes, we read the commit history every single day, and usually, we maintain separate branches and this separation simplify the process for us. +# Contributing to Fluent Bit docs + +First of all, thanks for taking the time to read this guide. The fact that you're +here means you're interested in contributing to Fluent Bit, and we highly appreciate +your time. + +This repository contains the files for the +[Fluent Bit documentation library](https://docs.fluentbit.io/). Keeping these docs +separate from the [main Fluent Bit repository](https://github.com/fluent/fluent-bit) +helps reduce the number of commits to the Fluent Bit source code and makes it +easier to maintain both projects. + +Fluent Bit has a group of dedicated maintainers who oversee this repository, +including several technical writers. These writers will review any pull requests +you open, so don't be afraid to contribute—even if you're not a writer by trade. +Your suggestions are valuable, and we'll help you wrangle any stray commas. + +## GitBook + +The Fluent Bit docs library is built and hosted through +[GitBook](https://docs.gitbook.com/). Unfortunately, GitBook doesn't support +local previews for contributors, but a Fluent Bit maintainer with a dedicated GitBook +account can verify that things are formatted correctly after you open a new pull +request. + +Each `.md` file in this repository is a single page. You can use +[standard Markdown syntax](https://docs.gitbook.com/content-editor/editing-content/markdown) +to edit existing pages, or create a new `.md` file to add an additional page to +the docs library. If you create a new page, you'll also need to update +[GitBook's `SUMMARY.md` file](https://docs.gitbook.com/integrations/git-sync/content-configuration#structure) +(or ask a maintainer to update it for you). ## Workflow -All documentation contributions arrives as Pull Requests (PR) on Github in the repository [fluent/fluent-bit-docs](https://github.com/fluent/fluent-bit-docs). Then some of the maintainers of Fluent Bit will review it, triage it, add comments if needed, or merge it. +After you open a pull request in this repository, a Fluent Bit maintainer will review +it, triage it, add comments or suggestions as needed, and then merge it. After +your changes are successfully merged into `master`, the docs site will update +within a few minutes. -Once a PR is merged, a third-party service called [Gitbook](https://gitbook.com) will receive a notification and will grab the latest changes, render a new site and update the content of [docs.fluentbit.io](https://docs.fluentbit.io). +### Stale pull requests -## Source Code Structure +If you open a pull request that requires ongoing discussion or review, the +Fluent Bit maintainers will add a [`waiting-for-user` tag](#tags) to your pull +request. This tag means that we're blocked from moving forward until you reply. +To keep contributions from going stale, we'll wait 45 days for your response, +but we may close the pull request if we don't hear back from you by then. -Documentation source code structure depends on Fluent Bit source code structure and its versions. In Fluent Bit source code, we have a stable branch and a development branch; as of now, these are: +## Submit a contribution -- stable branch: [1.8](https://github.com/fluent/fluent-bit/tree/1.8) -- development branch: [master](https://github.com/fluent/fluent-bit/tree/master) +When you open a pull request, make your changes against `master`, which is the +active development branch. If your contribution also applies to the latest +stable version, submit another PR for that versioned branch. However, if +submitting multiple PRs at the same time adds too much complexity, you can instead +create a single PR against `master` and specify that your changes need to be +**backported** to other branches; one of our maintainers will take care of that +process on your behalf. -For Documentation, we follow the same pattern; we have branches for the stable and development versions. +All contributions must be made **first** against [master branch](https://github.com/fluent/fluent-bit-docs/tree/master) which is the active development branch, and then **if** the contribution also applies for the current stable branch, submit another PR for that specific branch, if submitting another PR adds some complexity, please specify in the first PR as a comment (for master branch) that it needs to be *backported*. One of our maintainers will take care of that process. -## Submitting Contributions +As a contributor, we'll ask you to follow a few best practices related to Git: -All contributions must be made **first** against [master branch](https://github.com/fluent/fluent-bit-docs/tree/master) which is the active development branch, and then **if** the contribution also applies for the current stable branch, submit another PR for that specific branch, if submitting another PR adds some complexity, please specify in the first PR as a comment (for master branch) that it needs to be *backported*. One of our maintainers will take care of that process. +### One file per commit -### GIT email check +Each commit you make should only modify one file or interface—we follow the same +practice in the Fluent Bit source code. -Most of the time GIT is not fully configured in your environment, and when cloning the repository and committing changes, the user email might not be set. Make sure your email is properly configured. You can check your current setting with: +### Commit subjects -```bash -cd fluent-bit-docs/ -git config user.email -``` - -If you need to adjust your email, do this: - -``` -git config user.email something@myemailprovider.com -``` +Use descriptive commit subjects that describe which file or interface you're +modifying. -### Commit Subjects +For example, if you're modifying the Syslog output plugin doc, whose file is +located at [pipeline/outputs/syslog.md](https://github.com/fluent/fluent-bit-docs/blob/master/pipeline/outputs/syslog.md), this would be a descriptive commit subject: -The subject must be representative enough to describe which `file` or `interface` is modifying when committing your changes. An everyday use case or example is: +`pipeline: outputs: syslog: fix grammar in examples` -- User is enhancing the documentation for the Syslog output plugin +Since this commit is prefixed with the relevant file path, it helps our maintainers +understand and prioritize your contribution. -Considering that Syslog output plugin documentation resides in this address: +### Set your email in Git -- [pipeline/outputs/syslog.md](https://github.com/fluent/fluent-bit-docs/blob/master/pipeline/outputs/syslog.md) +Make sure your email address is configured in your local Git environment. This +should be the same email address associated with your GitHub account. -the suggested commit will be: +For more information, refer to GitHub's guide to +[setting your commit email address in Git](https://docs.github.com/en/account-and-profile/setting-up-and-managing-your-personal-account-on-github/managing-email-preferences/setting-your-commit-email-address#setting-your-commit-email-address-in-git). -``` -pipeline: outputs: syslog: fix grammar in examples -``` +### Sign off your commits -As you can see, the commit is prefixed with the paths of the file being modified. For maintainers, this helps to understand and prioritize the review of the contributions. +You must sign off your commits to certify your identity as the commit author. If +you don't sign off your commits, our CI system will flag the pull request with a +[DCO](https://github.com/src-d/guide/blob/master/developer-community/fix-DCO.md) +error and prevent your pull request from merging. -Usually, a PR can have multiple commits, but we enforce that every commit only touches one file or interface (we apply the same practice in Fluent Bit source code). +To prevent DCO errors, refer to the following guide about +[signing your commits properly](https://github.com/src-d/guide/blob/master/developer-community/fix-DCO.md). -### Sign off your commits +> :bulb: For faster signing, you can use the `-s` flag in Git: +> +> `git commit -a -s -m "pipeline: outputs: syslog: fix grammar in examples"` +> +> If you're using VSCode, you can also enable the +> [**Git: Always Sign Off**](https://github.com/microsoft/vscode/issues/83096#issuecomment-545350047) +> setting, which automatically appends a `Signed-off-by:` message to your commits. -Your commits must be **sign off**; this certifies who is the author of the commit. It might sound a bit redundant, but it is needed. If you don't sign-off your commits, our CI system will flag the PR with a [DCO](https://github.com/src-d/guide/blob/master/developer-community/fix-DCO.md) error and the PR will be blocked. +## Style guidelines -The following link explains how to fix DCO error by signing your commits properly: +The Fluent Bit maintainers refer to the +[Google developer documentation style guide](https://developers.google.com/style) +for most topics related to grammar, style, and formatting. We don't expect you +to memorize these style rules, but the technical writer who reviews your pull +request may suggest changes accordingly. -- https://github.com/src-d/guide/blob/master/developer-community/fix-DCO.md +### URLs -For short: always use `-s` when committing your changes, e.g.: +When cross-linking to a page in this repository, use a full relative path whenever +possible. For example: +```text +[LTSV](pipeline/parsers/ltsv.md) and [Logfmt](pipeline/parsers/logfmt.md) ``` -git commit -a -s -m "pipeline: outputs: syslog: fix grammar in examples" -``` +### Vale + +The Fluent Bit maintainers use the [Vale](https://vale.sh/docs/) plugin, which lints +pull requests and adds suggestions to improve style and clarity. diff --git a/MAINTAINERS.md b/MAINTAINERS.md index 31153b111..9abcf140a 100644 --- a/MAINTAINERS.md +++ b/MAINTAINERS.md @@ -2,7 +2,10 @@ Fluent Bit is developed and supported by many individuals and companies. The following table list the names of maintainers per components and the company supporting their work. -| Maintainer Name | Documentation portion | Company | -| :---------------------------------------------------- | ------------------------ | ------------------------------------------------- | -| [Eduardo Silva](https://github.com/edsiper) | All | [Arm Treasure Data](https://www.treasuredata.com) | -| [Anurag Gupta](https://github.com/agup006) | All | [Calyptia](https://Calyptia.com) | +| Maintainer Name | Documentation portion | Company | +| :---------------------------------------------------- | ------------------------ | ------------------------------------------------- | +| [Eduardo Silva](https://github.com/edsiper) | All | [Chronosphere](https://chronosphere.io) | +| [Anurag Gupta](https://github.com/agup006) | All | [Chronosphere](https://chronosphere.io) | +| [Jose Lecaros](https://github.com/lecaros) | All | [Chronosphere](https://chronosphere.io) | +| [Lynette Miles](https://github.com/esmerel) | All | [Chronosphere](https://chronosphere.io) | +| [Alexa Kreizinger](https://github.com/alexakreizinger) | All | [Chronosphere](https://chronosphere.io) | diff --git a/README.md b/README.md index 3656b69aa..3792b40de 100644 --- a/README.md +++ b/README.md @@ -2,39 +2,55 @@ description: High Performance Telemetry Agent for Logs, Metrics and Traces --- -# Fluent Bit v2.2 Documentation +# Fluent Bit v4.0 Documentation -<figure><img src=".gitbook/assets/v2.2 big@2x.png" alt=""><figcaption></figcaption></figure> +<figure><img src=".gitbook/assets/3.2.png" alt=""><figcaption></figcaption></figure> -[Fluent Bit](http://fluentbit.io) is a Fast and Lightweight **Telemetry Agent** for Logs, Metrics, and Traces for Linux, macOS, Windows, and BSD family operating systems. It has been made with a strong focus on performance to allow the collection and processing of telemetry data from different sources without complexity. +[Fluent Bit](http://fluentbit.io) is a fast and lightweight telemetry agent +for logs, metrics, and traces for Linux, macOS, Windows, and BSD family +operating systems. Fluent Bit has been made with a strong focus on performance to allow +the collection and processing of telemetry data from different sources without +complexity. ## Features -* High Performance: High throughput with low resources consumption -* Data Parsing - * Convert your unstructured messages using our parsers: [JSON](pipeline/parsers/json.md), [Regex](pipeline/parsers/regular-expression.md), [LTSV](pipeline/parsers/ltsv.md) and [Logfmt](pipeline/parsers/logfmt.md) -* Metrics Support: Prometheus and OpenTelemetry compatible -* Reliability and Data Integrity - * [Backpressure](administration/backpressure.md) Handling - * [Data Buffering](administration/buffering-and-storage.md) in memory and file system -* Networking - * Security: built-in TLS/SSL support - * Asynchronous I/O -* Pluggable Architecture and [Extensibility](development/library_api.md): Inputs, Filters and Outputs - * More than 100 built-in plugins are available - * Extensibility - * Write any input, filter or output plugin in C language - * WASM: [WASM Filter Plugins](development/wasm-filter-plugins.md) or [WASM Input Plugins](development/wasm-input-plugins.md) - * Bonus: write [Filters in Lua](pipeline/filters/lua.md) or [Output plugins in Golang](development/golang-output-plugins.md) -* [Monitoring](administration/monitoring.md): expose internal metrics over HTTP in JSON and [Prometheus](https://prometheus.io/) format -* [Stream Processing](stream-processing/introduction.md): Perform data selection and transformation using simple SQL queries - * Create new streams of data using query results - * Aggregation Windows - * Data analysis and prediction: Timeseries forecasting -* Portable: runs on Linux, macOS, Windows and BSD systems +- High performance: High throughput with low resources consumption +- Data parsing + - Convert your unstructured messages using our parsers: + [JSON](pipeline/parsers/json.md), + [Regex](pipeline/parsers/regular-expression.md), + [LTSV](pipeline/parsers/ltsv.md) and [Logfmt](pipeline/parsers/logfmt.md) +- Metrics support: Prometheus and OpenTelemetry compatible +- Reliability and data integrity + - [Backpressure](administration/backpressure.md) handling + - [Data buffering](administration/buffering-and-storage.md) in memory and file system +- Networking + - Security: Built-in TLS/SSL support + - Asynchronous I/O +- Pluggable architecture and [extensibility](development/library_api.md): Inputs, + Filters and Outputs: + - Connect nearly any source to nearly any destination using preexisting plugins + - Extensibility: + - Write input, filter, or output plugins in the C language + - WASM: [WASM Filter Plugins](development/wasm-filter-plugins.md) or + [WASM Input Plugins](development/wasm-input-plugins.md) + - Write [Filters in Lua](pipeline/filters/lua.md) or + [Output plugins in Golang](development/golang-output-plugins.md) +- [Monitoring](administration/monitoring.md): Expose internal metrics over HTTP + in JSON and [Prometheus](https://prometheus.io/) format +- [Stream Processing](stream-processing/introduction.md): Perform data selection + and transformation using simple SQL queries + - Create new streams of data using query results + - Aggregation windows + - Data analysis and prediction: Timeseries forecasting +- Portable: Runs on Linux, macOS, Windows and BSD systems ## Fluent Bit, Fluentd and CNCF -[Fluent Bit](http://fluentbit.io) is a [CNCF](https://cncf.io) **graduated** sub-project under the umbrella of [Fluentd](http://fluentd.org). Fluent Bit is licensed under the terms of the [Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0). +[Fluent Bit](http://fluentbit.io) is a [CNCF](https://cncf.io) graduated sub-project +under the umbrella of [Fluentd](http://fluentd.org). Fluent Bit is licensed under +the terms of the [Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0). -Fluent Bit was originally created by [Eduardo Silva](https://www.linkedin.com/in/edsiper/). As a CNCF-hosted project, it is a fully **vendor-neutral** and community-driven project. +Fluent Bit was originally created by [Eduardo Silva](https://www.linkedin.com/in/edsiper/) +and is now sponsored by [Chronosphere](https://chronosphere.io/). As a +CNCF-hosted project, it is a fully vendor-neutral and community-driven project. diff --git a/SUMMARY.md b/SUMMARY.md index d8151098f..ea3ab6cea 100644 --- a/SUMMARY.md +++ b/SUMMARY.md @@ -1,6 +1,6 @@ # Table of contents -* [Fluent Bit v2.2 Documentation](README.md) +* [Fluent Bit v4.0 Documentation](README.md) ## About @@ -8,6 +8,7 @@ * [A Brief History of Fluent Bit](about/history.md) * [Fluentd & Fluent Bit](about/fluentd-and-fluent-bit.md) * [License](about/license.md) +* [Sandbox and Lab Resources](about/sandbox-and-lab-resources.md) ## Concepts @@ -44,10 +45,20 @@ * [macOS](installation/macos.md) * [Windows](installation/windows.md) * [Yocto / Embedded Linux](installation/yocto-embedded-linux.md) +* [Buildroot / Embedded Linux](installation/buildroot-embedded-linux.md) ## Administration * [Configuring Fluent Bit](administration/configuring-fluent-bit/README.md) + * [YAML Configuration](administration/configuring-fluent-bit/yaml/README.md) + * [Service](administration/configuring-fluent-bit/yaml/service-section.md) + * [Parsers](administration/configuring-fluent-bit/yaml/parsers-section.md) + * [Multiline Parsers](administration/configuring-fluent-bit/yaml/multiline-parsers-section.md) + * [Pipeline](administration/configuring-fluent-bit/yaml/pipeline-section.md) + * [Plugins](administration/configuring-fluent-bit/yaml/plugins-section.md) + * [Upstream Servers](administration/configuring-fluent-bit/yaml/upstream-servers-section.md) + * [Environment Variables](administration/configuring-fluent-bit/yaml/environment-variables-section.md) + * [Includes](administration/configuring-fluent-bit/yaml/includes-section.md) * [Classic mode](administration/configuring-fluent-bit/classic-mode/README.md) * [Format and Schema](administration/configuring-fluent-bit/classic-mode/format-schema.md) * [Configuration File](administration/configuring-fluent-bit/classic-mode/configuration-file.md) @@ -55,8 +66,6 @@ * [Commands](administration/configuring-fluent-bit/classic-mode/commands.md) * [Upstream Servers](administration/configuring-fluent-bit/classic-mode/upstream-servers.md) * [Record Accessor](administration/configuring-fluent-bit/classic-mode/record-accessor.md) - * [YAML Configuration](administration/configuring-fluent-bit/yaml/README.md) - * [Configuration File](administration/configuring-fluent-bit/yaml/configuration-file.md) * [Unit Sizes](administration/configuring-fluent-bit/unit-sizes.md) * [Multiline Parsing](administration/configuring-fluent-bit/multiline-parsing.md) * [Transport Security](administration/transport-security.md) @@ -66,9 +75,12 @@ * [Networking](administration/networking.md) * [Memory Management](administration/memory-management.md) * [Monitoring](administration/monitoring.md) +* [Multithreading](administration/multithreading.md) * [HTTP Proxy](administration/http-proxy.md) * [Hot Reload](administration/hot-reload.md) * [Troubleshooting](administration/troubleshooting.md) +* [Performance Tips](administration/performance.md) +* [AWS credentials](administration/aws-credentials.md) ## Local Testing @@ -82,17 +94,18 @@ * [Collectd](pipeline/inputs/collectd.md) * [CPU Log Based Metrics](pipeline/inputs/cpu-metrics.md) * [Disk I/O Log Based Metrics](pipeline/inputs/disk-io-metrics.md) - * [Docker Log Based Metrics](pipeline/inputs/docker-metrics.md) * [Docker Events](pipeline/inputs/docker-events.md) + * [Docker Log Based Metrics](pipeline/inputs/docker-metrics.md) * [Dummy](pipeline/inputs/dummy.md) * [Elasticsearch](pipeline/inputs/elasticsearch.md) * [Exec](pipeline/inputs/exec.md) * [Exec Wasi](pipeline/inputs/exec-wasi.md) + * [Ebpf](pipeline/inputs/ebpf.md) * [Fluent Bit Metrics](pipeline/inputs/fluentbit-metrics.md) * [Forward](pipeline/inputs/forward.md) * [Head](pipeline/inputs/head.md) - * [HTTP](pipeline/inputs/http.md) * [Health](pipeline/inputs/health.md) + * [HTTP](pipeline/inputs/http.md) * [Kafka](pipeline/inputs/kafka.md) * [Kernel Logs](pipeline/inputs/kernel-logs.md) * [Kubernetes Events](pipeline/inputs/kubernetes-events.md) @@ -101,9 +114,11 @@ * [Network I/O Log Based Metrics](pipeline/inputs/network-io-metrics.md) * [NGINX Exporter Metrics](pipeline/inputs/nginx.md) * [Node Exporter Metrics](pipeline/inputs/node-exporter-metrics.md) + * [OpenTelemetry](pipeline/inputs/opentelemetry.md) * [Podman Metrics](pipeline/inputs/podman-metrics.md) - * [Process Log Based Metrics](pipeline/inputs/process.md) * [Process Exporter Metrics](pipeline/inputs/process-exporter-metrics.md) + * [Process Log Based Metrics](pipeline/inputs/process.md) + * [Prometheus Remote Write](pipeline/inputs/prometheus-remote-write.md) * [Prometheus Scrape Metrics](pipeline/inputs/prometheus-scrape-metrics.md) * [Random](pipeline/inputs/random.md) * [Serial Interface](pipeline/inputs/serial-interface.md) @@ -116,7 +131,6 @@ * [TCP](pipeline/inputs/tcp.md) * [Thermal](pipeline/inputs/thermal.md) * [UDP](pipeline/inputs/udp.md) - * [OpenTelemetry](pipeline/inputs/opentelemetry.md) * [Windows Event Log](pipeline/inputs/windows-event-log.md) * [Windows Event Log (winevtlog)](pipeline/inputs/windows-event-log-winevtlog.md) * [Windows Exporter Metrics](pipeline/inputs/windows-exporter-metrics.md) @@ -127,6 +141,13 @@ * [LTSV](pipeline/parsers/ltsv.md) * [Logfmt](pipeline/parsers/logfmt.md) * [Decoders](pipeline/parsers/decoders.md) +* [Processors](pipeline/processors/README.md) + * [Content Modifier](pipeline/processors/content-modifier.md) + * [Labels](pipeline/processors/labels.md) + * [Metrics Selector](pipeline/processors/metrics-selector.md) + * [OpenTelemetry Envelope](pipeline/processors/opentelemetry-envelope.md) + * [SQL](pipeline/processors/sql.md) + * [Conditional processing](pipeline/processors/conditional-processing.md) * [Filters](pipeline/filters/README.md) * [AWS Metadata](pipeline/filters/aws-metadata.md) * [CheckList](pipeline/filters/checklist.md) @@ -155,12 +176,14 @@ * [Amazon Kinesis Data Firehose](pipeline/outputs/firehose.md) * [Amazon Kinesis Data Streams](pipeline/outputs/kinesis.md) * [Amazon S3](pipeline/outputs/s3.md) - * [Azure Blob](pipeline/outputs/azure\_blob.md) - * [Azure Data Explorer](pipeline/outputs/azure\_kusto.md) + * [Azure Blob](pipeline/outputs/azure_blob.md) + * [Azure Data Explorer](pipeline/outputs/azure_kusto.md) * [Azure Log Analytics](pipeline/outputs/azure.md) * [Azure Logs Ingestion API](pipeline/outputs/azure_logs_ingestion.md) * [Counter](pipeline/outputs/counter.md) + * [Dash0](pipeline/outputs/dash0.md) * [Datadog](pipeline/outputs/datadog.md) + * [Dynatrace](pipeline/outputs/dynatrace.md) * [Elasticsearch](pipeline/outputs/elasticsearch.md) * [File](pipeline/outputs/file.md) * [FlowCounter](pipeline/outputs/flowcounter.md) @@ -174,13 +197,15 @@ * [Kafka REST Proxy](pipeline/outputs/kafka-rest-proxy.md) * [LogDNA](pipeline/outputs/logdna.md) * [Loki](pipeline/outputs/loki.md) + * [Microsoft Fabric](pipeline/outputs/azure_kusto.md) * [NATS](pipeline/outputs/nats.md) * [New Relic](pipeline/outputs/new-relic.md) * [NULL](pipeline/outputs/null.md) * [Observe](pipeline/outputs/observe.md) - * [Oracle Log Analytics](pipeline/outputs/oci-logging-analytics.md) + * [OpenObserve](pipeline/outputs/openobserve.md) * [OpenSearch](pipeline/outputs/opensearch.md) * [OpenTelemetry](pipeline/outputs/opentelemetry.md) + * [Oracle Log Analytics](pipeline/outputs/oci-logging-analytics.md) * [PostgreSQL](pipeline/outputs/postgresql.md) * [Prometheus Exporter](pipeline/outputs/prometheus-exporter.md) * [Prometheus Remote Write](pipeline/outputs/prometheus-remote-write.md) @@ -207,7 +232,7 @@ ## Fluent Bit for Developers <a href="#development" id="development"></a> -* [C Library API](development/library\_api.md) +* [C Library API](development/library_api.md) * [Ingest Records Manually](development/ingest-records-manually.md) * [Golang Output Plugins](development/golang-output-plugins.md) * [WASM Filter Plugins](development/wasm-filter-plugins.md) diff --git a/about/fluentd-and-fluent-bit.md b/about/fluentd-and-fluent-bit.md index f73a9c84d..9dd73b1ea 100644 --- a/about/fluentd-and-fluent-bit.md +++ b/about/fluentd-and-fluent-bit.md @@ -1,33 +1,45 @@ --- -description: The Production Grade Telemetry Ecosystem +description: The production grade telemetry ecosystem --- -# Fluentd & Fluent Bit +# Fluentd and Fluent Bit -Telemetry data processing in general can be complex, and at scale a bit more, that's why [Fluentd](https://www.fluentd.org) was born. Fluentd has become more than a simple tool, it has grown into a fullscale ecosystem that contains SDKs for different languages and sub-projects like [Fluent Bit](https://fluentbit.io). +Telemetry data processing can be complex, especially at scale. That's why +[Fluentd](https://www.fluentd.org) was created. Fluentd is more than a simple tool, +it's grown into a fullscale ecosystem that contains SDKs for different languages +and subprojects like [Fluent Bit](https://fluentbit.io). -On this page, we will describe the relationship between the [Fluentd](http://fluentd.org) and [Fluent Bit](http://fluentbit.io) open source projects, as a summary we can say both are: +Here, we describe the relationship between the [Fluentd](http://fluentd.org) +and [Fluent Bit](http://fluentbit.io) open source projects. -* Licensed under the terms of Apache License v2.0 -* **Graduated** Hosted projects by the [Cloud Native Computing Foundation (CNCF)](https://cncf.io) -* Production Grade solutions: deployed **million** of times every single day. -* **Vendor neutral** and community driven projects -* Widely Adopted by the Industry: trusted by all major companies like AWS, Microsoft, Google Cloud and hundreds of others. +Both projects are: -Both projects share a lot of similarities, [Fluent Bit](https://fluentbit.io) is fully designed and built on top of the best ideas of [Fluentd](https://www.fluentd.org) architecture and general design. Choosing which one to use depends on the end-user needs. +- Licensed under the terms of Apache License v2.0. +- Graduated hosted projects by the [Cloud Native Computing Foundation (CNCF)](https://cncf.io). +- Production grade solutions: Deployed millions of times every single day. +- Vendor neutral and community driven. +- Widely adopted by the industry: Trusted by major companies like AWS, Microsoft, + Google Cloud, and hundreds of others. + +The projects have many similarities: [Fluent Bit](https://fluentbit.io) is +designed and built on top of the best ideas of [Fluentd](https://www.fluentd.org) +architecture and general design. Which one you choose depends on your end-users' needs. The following table describes a comparison of different areas of the projects: -| | Fluentd | Fluent Bit | -| ------------ | ----------------------------------------------------------------- | ----------------------------------------------------------------- | -| Scope | Containers / Servers | Embedded Linux / Containers / Servers | -| Language | C & Ruby | C | -| Memory | > 60MB | \~1MB | -| Performance | Medium Performance | High Performance | -| Dependencies | Built as a Ruby Gem, it requires a certain number of gems. | Zero dependencies, unless some special plugin requires them. | -| Plugins | More than 1000 external plugins are available | More than 100 built-in plugins are available | +| Attribute | Fluentd | Fluent Bit | +| ------------ | --------------------- | --------------------- | +| Scope | Containers / Servers | Embedded Linux / Containers / Servers | +| Language | C & Ruby | C | +| Memory | Greater than 60 MB | Approximately 1 MB | +| Performance | Medium Performance | High Performance | +| Dependencies | Built as a Ruby Gem, depends on other gems. | Zero dependencies, unless required by a plugin. | +| Plugins | Over 1,000 external plugins available. | Over 100 built-in plugins available. | | License | [Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0) | [Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0) | -Both [Fluentd](https://www.fluentd.org) and [Fluent Bit](https://fluentbit.io) can work as Aggregators or Forwarders, they both can complement each other or use them as standalone solutions.\ -\ -In the recent years, Cloud Providers switched from Fluentd to Fluent Bit for performance and compatibility reasons. Fluent Bit is now considered the **next generation** solution. +Both [Fluentd](https://www.fluentd.org) and [Fluent Bit](https://fluentbit.io) +can work as Aggregators or Forwarders, and can complement each other or be used +as standalone solutions. + +In the recent years, cloud providers have switched from Fluentd to Fluent Bit for +performance and compatibility. Fluent Bit is now considered the next-generation solution. diff --git a/about/history.md b/about/history.md index 780aa3ccc..0e71d209f 100644 --- a/about/history.md +++ b/about/history.md @@ -1,10 +1,20 @@ --- +title: A brief history of Fluent Bit description: Every project has a story --- -# A Brief History of Fluent Bit +# A brief history of Fluent Bit -On 2014, the [Fluentd](https://www.fluentd.org/) team at [Treasure Data](https://www.treasuredata.com/) was forecasting the need for a lightweight log processor for constraint environments like Embedded Linux and Gateways, the project aimed to be part of the Fluentd Ecosystem; at that moment, Eduardo created [Fluent Bit](https://fluentbit.io/), a new open source solution written from scratch available under the terms of the [Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0).\ +In 2014, the [Fluentd](https://www.fluentd.org/) team at +[Treasure Data](https://www.treasuredata.com/) was forecasting the need for a +lightweight log processor for constraint environments like embedded Linux and +gateways. The project aimed to be part of the Fluentd ecosystem. At that moment, +Eduardo Silva created [Fluent Bit](https://fluentbit.io/), a new open source solution, +written from scratch and available under the terms of the +[Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0). - -After the project was around for some time, it got more traction for normal Linux systems, also with the new containerized world, the Cloud Native community asked to extend the project scope to support more sources, filters, and destinations. Not so long after, Fluent Bit became one of the preferred solutions to solve the logging challenges in Cloud environments. +After the project matured, it gained traction for normal Linux systems. With the +new containerized world, the Cloud Native community asked to extend the +project scope to support more sources, filters, and destinations. Not long after, +Fluent Bit became one of the preferred solutions to solve the logging challenges +in Cloud environments. diff --git a/about/license.md b/about/license.md index 44bb3506e..625714e45 100644 --- a/about/license.md +++ b/about/license.md @@ -1,10 +1,13 @@ --- -description: Strong Commitment to the Openness and Collaboration +title: License +description: Fluent Bit license description --- # License -[Fluent Bit](http://fluentbit.io), including its core, plugins and tools are distributed under the terms of the [Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0): +[Fluent Bit](http://fluentbit.io), including its core, plugins, and tools are +distributed under the terms of the +[Apache License v2.0](http://www.apache.org/licenses/LICENSE-2.0): ```text Apache License @@ -184,4 +187,3 @@ description: Strong Commitment to the Openness and Collaboration END OF TERMS AND CONDITIONS ``` - diff --git a/about/sandbox-and-lab-resources.md b/about/sandbox-and-lab-resources.md new file mode 100644 index 000000000..fe863a9ad --- /dev/null +++ b/about/sandbox-and-lab-resources.md @@ -0,0 +1,43 @@ +--- +description: >- + The following page gives an overview of free public resources for Sandbox and + Labs for learning how to best operate, use, and have success with Fluent Bit. +--- + +# Sandbox and Lab Resources + +## Fluent Bit Sandbox - sign-up required + +The following are labs that can run in your browser however require email sign-up + +<div data-full-width="true"><figure><img src="../.gitbook/assets/image (1).png" alt=""><figcaption></figcaption></figure></div> + +* [Fluent Bit 101 Sandbox - Getting Started with configuration and routing](https://play.instruqt.com/Fluent/invite/nuys5ifhsprt) + +## Open Source Labs - environment required + +The following are open source labs where you will need to spin up resources to run through the lab in details + +### O11y Workshops by Chronosphere + +These workshops, open source, provided by Chronosphere can be found here: [https://o11y-workshops.gitlab.io/](https://o11y-workshops.gitlab.io/). The OSS repository can be found here: [https://gitlab.com/o11y-workshops/workshop-fluentbit](https://gitlab.com/o11y-workshops/workshop-fluentbit) + +The cards below include links to each of the labs in the workshop  + +{% embed url="https://o11y-workshops.gitlab.io/workshop-fluentbit/" %} +Fluent Bit Workshop for Getting Started with Cloud Native Telemetry Pipelines +{% endembed %} + +1. [Lab 1 - Introduction to Fluent Bit](https://o11y-workshops.gitlab.io/workshop-fluentbit/lab01.html) +2. [Lab 2 - Installing Fluent Bit](https://o11y-workshops.gitlab.io/workshop-fluentbit/#/4) +3. [Lab 3 - Exploring First Pipelines](https://o11y-workshops.gitlab.io/workshop-fluentbit/#/5) +4. [Lab 4 - Exploring More Pipelines](https://o11y-workshops.gitlab.io/workshop-fluentbit/#/6) +5. [Lab 5 - Understanding Backpressure](https://o11y-workshops.gitlab.io/workshop-fluentbit/#/7) +6. [Lab 6 - Avoid Telemetry Data Loss](https://o11y-workshops.gitlab.io/workshop-fluentbit/#/8) +7. [Lab 7 - Pipeline Integration with OpenTelemetry](https://o11y-workshops.gitlab.io/workshop-fluentbit/#/9) + +### Logging with Fluent Bit and Amazon OpenSearch workshop by Amazon + +This workshop by Amazon goes through common Kubernetes logging patterns and routing data to OpenSearch and visualizing with OpenSearch dashboards + +{% embed url="https://archive.eksworkshop.com/intermediate/230_logging/" %} diff --git a/about/what-is-fluent-bit.md b/about/what-is-fluent-bit.md index afb7d5aeb..e0b4fd9d2 100644 --- a/about/what-is-fluent-bit.md +++ b/about/what-is-fluent-bit.md @@ -4,14 +4,24 @@ description: Fluent Bit is a CNCF sub-project under the umbrella of Fluentd # What is Fluent Bit? +[Fluent Bit](https://fluentbit.io) is an open source telemetry agent specifically +designed to efficiently handle the challenges of collecting and processing telemetry +data across a wide range of environments, from constrained systems to complex cloud +infrastructures. Managing telemetry data from various sources and formats can be a +constant challenge, particularly when performance is a critical factor. + +Rather than serving as a drop-in replacement, Fluent Bit enhances the observability +strategy for your infrastructure by adapting and optimizing your existing logging +layer, and adding metrics and traces processing. Fluent Bit supports a +vendor-neutral approach, seamlessly integrating with other ecosystems such as +Prometheus and OpenTelemetry. Trusted by major cloud providers, banks, and companies +in need of a ready-to-use telemetry agent solution, Fluent Bit effectively manages +diverse data sources and formats while maintaining optimal performance and keeping +resource consumption low. + +Fluent Bit can be deployed as an edge agent for localized telemetry data handling or +utilized as a central aggregator/collector for managing telemetry data across +multiple sources and environments. -[Fluent Bit](https://fluentbit.io) is an open-source telemetry agent specifically designed to efficiently handle the challenges of collecting and processing telemetry data across a wide range of environments, from constrained systems to complex cloud infrastructures. Managing telemetry data from various sources and formats can be a constant challenge, particularly when performance is a critical factor. - -Rather than serving as a drop-in replacement, Fluent Bit enhances the observability strategy for your infrastructure by adapting and optimizing your existing logging layer, as well as metrics and traces processing. Furthermore, Fluent Bit supports a vendor-neutral approach, seamlessly integrating with other ecosystems such as Prometheus and OpenTelemetry. Trusted by major cloud providers, banks, and companies in need of a ready-to-use telemetry agent solution, Fluent Bit effectively manages diverse data sources and formats while maintaining optimal performance. - -Fluent Bit can be deployed as an edge agent for localized telemetry data handling or utilized as a central aggregator/collector for managing telemetry data across multiple sources and environments. - -[Fluent Bit](https://fluentbit.io) has been designed with performance and low resource consumption in mind. - {% embed url="https://www.youtube.com/watch?v=3ELc1helke4" %} diff --git a/administration/aws-credentials.md b/administration/aws-credentials.md index c59b48445..e4b21b1dd 100644 --- a/administration/aws-credentials.md +++ b/administration/aws-credentials.md @@ -1,39 +1,53 @@ # AWS Credentials -Plugins that interact with AWS services will fetch credentials from various providers in the following order. -Only the first provider that is able to provide credentials will be used. +Plugins that interact with AWS services fetch credentials from the following providers +in order. Only the first provider that provides credentials is used. -All AWS plugins additionally support a `role_arn` (or `AWS_ROLE_ARN`, for [Elasticsearch](../pipeline/outputs/elasticsearch.md)) configuration parameter. If specified, the fetched credentials will then be used to assume the given role. +- [Environment variables](#environment-variables) +- [Shared configuration and credentials files](#shared-configuration-and-credentials-files) +- [EKS Web Identity Token (OIDC)](#eks-web-identity-token-oidc) +- [ECS HTTP credentials endpoint](#ecs-http-credentials-endpoint) +- [EC2 Instance Profile Credentials (IMDS)](#ec2-instance-profile-credentials-imds) -## 1. Environment Variables +All AWS plugins additionally support a `role_arn` (or `AWS_ROLE_ARN`, for +[Elasticsearch](../pipeline/outputs/elasticsearch.md)) configuration parameter. If +specified, the fetched credentials are used to assume the given role. -Uses the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` (and optionally `AWS_SESSION_TOKEN`) environment variables if set. +## Environment variables -## 2. Shared Configuration and Credentials Files +Plugins use the `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` (and optionally +`AWS_SESSION_TOKEN`) environment variables if set. -Reads the shared config file at `$AWS_CONFIG_FILE` (or `$HOME/.aws/config`) and the shared credentials file at `$AWS_SHARED_CREDENTIALS_FILE` (or `$HOME/.aws/credentials`) to fetch the credentials for the profile named `$AWS_PROFILE` or `$AWS_DEFAULT_PROFILE` (or "default"). See https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html. +## Shared configuration and credentials files -The shared settings will be evaluated in the following order. +Plugins read the shared `config` file at `$AWS_CONFIG_FILE` (or `$HOME/.aws/config`), +and the shared credentials file at `$AWS_SHARED_CREDENTIALS_FILE` (or +`$HOME/.aws/credentials`) to fetch the credentials for the profile named +`$AWS_PROFILE` or `$AWS_DEFAULT_PROFILE` (or "default"). See +[Configuration and credential file settings in the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html). -Setting|File|Description ----|---|--- -`credential_process`|config| See https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-sourcing-external.html.<br/>Supported on Linux only. -`aws_access_key_id`<br/>`aws_secret_access_key`<br/>*`aws_session_token`*|credentials|Access key ID and secret key to use to authenticate.<br/>The session token must be set for temporary credentials. +The shared settings evaluate in the following order: -At this time, no other settings are supported. +| Setting | File | Description | +|---|---|---| +| `credential_process` | `config` | Linux only. See [Sourcing credentials with an external process in the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-sourcing-external.html). | +| `aws_access_key_id`<br />`aws_secret_access_key`<br />`aws_session_token` | `credentials` | Access key ID and secret key to use to authenticate. The session token must be set for temporary credentials. | -## 3. EKS Web Identity Token (OIDC) +No other settings are supported. -Fetches credentials via a signed web identity token for a Kubernetes service account. -See https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html. +## EKS Web Identity Token (OIDC) -## 4. ECS HTTP Credentials Endpoint +Credentials are fetched using a signed web identity token for a Kubernetes service account. +See [IAM roles for service accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). -Fetches credentials for the ECS task's role. -See https://docs.aws.amazon.com/AmazonECS/latest/userguide/task-iam-roles.html. +## ECS HTTP credentials endpoint -## 5. EC2 Instance Profile Credentials (IMDS) +Credentials are fetched for the ECS task's role. See +[Amazon ECS task IAM role](https://docs.aws.amazon.com/AmazonECS/latest/userguide/task-iam-roles.html). -Fetches credentials for the EC2 instance profile's role. -See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html. -As of Fluent Bit version 1.8.8, IMDSv2 is used by default and IMDSv1 may be disabled. Prior versions of Fluent Bit require enabling IMDSv1 on EC2. \ No newline at end of file +## EC2 instance profile credentials (IMDS) + +Fetches credentials for the EC2 instance profile's role. See +[IAM roles for Amazon EC2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html). +As of Fluent Bit version 1.8.8, IMDSv2 is used by default and IMDSv1 might be disabled. +Prior versions of Fluent Bit require enabling IMDSv1 on EC2. diff --git a/administration/backpressure.md b/administration/backpressure.md index 14759f256..8cfa03723 100644 --- a/administration/backpressure.md +++ b/administration/backpressure.md @@ -1,67 +1,120 @@ # Backpressure -Under certain scenarios it is possible for logs or data to be ingested or created faster than the ability to flush it to some destinations. One such common scenario is when reading from big log files, especially with a large backlog, and dispatching the logs to a backend over the network, which takes time to respond. This generates backpressure leading to high memory consumption in the service. - -In order to avoid backpressure, Fluent Bit implements a mechanism in the engine that restricts the amount of data that an input plugin can ingest, this is done through the configuration parameters **Mem\_Buf\_Limit** and **storage.Max\_Chunks\_Up**. - -As described in the [Buffering](../concepts/buffering.md) concepts section, Fluent Bit offers two modes for data handling: in-memory only (default) and in-memory + filesystem \(optional\). - -The default `storage.type memory` buffer can be restricted with **Mem\_Buf\_Limit**. If memory reaches this limit and you reach a backpressure scenario, you will not be able to ingest more data until the data chunks that are in memory can be flushed. The input will be paused and Fluent Bit will [emit](https://github.com/fluent/fluent-bit/blob/v2.0.0/src/flb_input_chunk.c#L1334) a `[warn] [input] {input name or alias} paused (mem buf overlimit)` log message. Depending on the input plugin in use, this might lead to discard incoming data \(e.g: TCP input plugin\). The tail plugin can handle pause without data loss; it will store its current file offset and resume reading later. When buffer memory is available, the input will resume collecting/accepting logs and Fluent Bit will [emit](https://github.com/fluent/fluent-bit/blob/v2.0.0/src/flb_input_chunk.c#L1277) a `[info] [input] {input name or alias} resume (mem buf overlimit)` message. - -This risk of data loss can be mitigated by configuring secondary storage on the filesystem using the `storage.type` of `filesystem` \(as described in [Buffering & Storage](buffering-and-storage.md)\). Initially, logs will be buffered to *both* memory and filesystem. When the `storage.max_chunks_up` limit is reached, all the new data will be stored safely only in the filesystem. Fluent Bit will stop enqueueing new data in memory and will only buffer to the filesystem. Please note that when `storage.type filesystem` is set, the `Mem_Buf_Limit` setting no longer has any effect, instead, the `[SERVICE]` level `storage.max_chunks_up` setting controls the size of the memory buffer. - -## Mem\_Buf\_Limit - -This option is disabled by default and can be applied to all input plugins. Please note that `Mem_Buf_Limit` only applies with the default `storage.type memory`. Let's explain its behavior using the following scenario: - -* Mem\_Buf\_Limit is set to 1MB \(one megabyte\) -* input plugin tries to append 700KB -* engine route the data to an output plugin -* output plugin backend \(HTTP Server\) is down -* engine scheduler will retry the flush after 10 seconds -* input plugin tries to append 500KB - -At this exact point, the engine will **allow** appending those 500KB of data into the memory; in total it will have 1.2MB of data buffered. The limit is permissive and will allow a single write past the limit, but once the limit is **exceeded** the following actions are taken: - -* block local buffers for the input plugin \(cannot append more data\) -* notify the input plugin invoking a **pause** callback - -The engine will protect itself and will not append more data coming from the input plugin in question; note that it is the responsibility of the plugin to keep state and decide what to do in that _paused_ state. - -After some time, usually measured in seconds, if the scheduler was able to flush the initial 700KB of data or it has given up after retrying, that amount of memory is released and the following actions will occur: - -* Upon data buffer release \(700KB\), the internal counters get updated -* Counters now are set at 500KB -* Since 500KB is < 1MB it checks the input plugin state -* If the plugin is paused, it invokes a **resume** callback -* input plugin can continue appending more data - -## storage.max\_chunks\_up - -Please note that when `storage.type filesystem` is set, the `Mem_Buf_Limit` setting no longer has any effect, instead, the `[SERVICE]` level `storage.max_chunks_up` setting controls the size of the memory buffer. - -The setting behaves similarly to the above scenario with `Mem_Buf_Limit` when the non-default `storage.pause_on_chunks_overlimit` is enabled. - -When (default) `storage.pause_on_chunks_overlimit` is disabled, the input will not pause when the memory limit is reached. Instead, it will switch to only buffering logs in the filesystem. The disk spaced used for filesystem buffering can be limited with `storage.total_limit_size`. - -Please consule the [Buffering & Storage](buffering-and-storage.md) docs for more information. - -## About pause and resume Callbacks - -Each plugin is independent and not all of them implements the **pause** and **resume** callbacks. As said, these callbacks are just a notification mechanism for the plugin. - -One example of a plugin that implements these callbacks and keeps state correctly is the [Tail Input](../pipeline/inputs/tail.md) plugin. When the **pause** callback is triggered, it pauses its collectors and stops appending data. Upon **resume**, it resumes the collectors and continues ingesting data. Tail will track the current file offset when it pauses and resume at the same position. If the file has not been deleted or moved, it can still be read. - -With the default `storage.type memory` and `Mem_Buf_Limit`, the following log messages will be emitted for pause and resume: - -``` +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=63e37cfe-9ce3-4a18-933a-76b9198958c1" /> + +It's possible for logs or data to be ingested or created faster than the ability to +flush it to some destinations. A common scenario is when reading from big log files, +especially with a large backlog, and dispatching the logs to a backend over the +network, which takes time to respond. This generates _backpressure_, leading to high +memory consumption in the service. + +To avoid backpressure, Fluent Bit implements a mechanism in the engine that restricts +the amount of data an input plugin can ingest. Restriction is done through the +configuration parameters `Mem_Buf_Limit` and `storage.Max_Chunks_Up`. + +As described in the [Buffering](../concepts/buffering.md) concepts section, Fluent +Bit offers two modes for data handling: in-memory only (default) and in-memory and +filesystem (optional). + +The default `storage.type memory` buffer can be restricted with `Mem_Buf_Limit`. If +memory reaches this limit and you reach a backpressure scenario, you won't be able +to ingest more data until the data chunks that are in memory can be flushed. The +input pauses and Fluent Bit +[emits](https://github.com/fluent/fluent-bit/blob/v2.0.0/src/flb_input_chunk.c#L1334) +a `[warn] [input] {input name or alias} paused (mem buf overlimit)` log message. + +Depending on the input plugin in use, this might cause incoming data to be discarded +(for example, TCP input plugin). The tail plugin can handle pauses without data +ingloss, storing its current file offset and resuming reading later. When buffer +memory is available, the input resumes accepting logs. Fluent Bit +[emits](https://github.com/fluent/fluent-bit/blob/v2.0.0/src/flb_input_chunk.c#L1277) +a `[info] [input] {input name or alias} resume (mem buf overlimit)` message. + +Mitigate the risk of data loss by configuring secondary storage on the filesystem +using the `storage.type` of `filesystem` (as described in [Buffering & +Storage](buffering-and-storage.md)). Initially, logs will be buffered to both memory +and the filesystem. When the `storage.max_chunks_up` limit is reached, all new data +will be stored in the filesystem. Fluent Bit stops queueing new data in memory and +buffers only to the filesystem. When `storage.type filesystem` is set, the +`Mem_Buf_Limit` setting no longer has any effect. Instead, the `[SERVICE]` level +`storage.max_chunks_up` setting controls the size of the memory buffer. + +## `Mem_Buf_Limit` + +`Mem_Buf_Limit` applies only with the default `storage.type memory`. This option is +disabled by default and can be applied to all input plugins. + +As an example situation: + +- `Mem_Buf_Limit` is set to `1MB`. +- The input plugin tries to append 700 KB. +- The engine routes the data to an output plugin. +- The output plugin backend (HTTP Server) is down. +- Engine scheduler retries the flush after 10 seconds. +- The input plugin tries to append 500 KB. + +In this situation, the engine allows appending those 500 KB of data into the memory, +with a total of 1.2 MB of data buffered. The limit is permissive and will +allow a single write past the limit. When the limit is exceeded, the following +actions are taken: + +- Block local buffers for the input plugin (can't append more data). +- Notify the input plugin, invoking a `pause` callback. + +The engine protects itself and won't append more data coming from the input plugin in +question. It's the responsibility of the plugin to keep state and decide what to do +in a `paused` state. + +In a few seconds, if the scheduler was able to flush the initial 700 KB of data or it +has given up after retrying, that amount of memory is released and the following +actions occur: + +- Upon data buffer release (700 KB), the internal counters get updated. +- Counters now are set at 500 KB. +- Because 500 KB isless than 1 MB, it checks the input plugin state. +- If the plugin is paused, it invokes a `resume` callback. +- The input plugin can continue appending more data. + +## `storage.max_chunks_up` + +The `[SERVICE]` level `storage.max_chunks_up` setting controls the size of the memory +buffer. When `storage.type filesystem` is set, the `Mem_Buf_Limit` setting no longer +has an effect. + +The setting behaves similar to the `Mem_Buf_Limit` scenario when the non-default +`storage.pause_on_chunks_overlimit` is enabled. + +When (default) `storage.pause_on_chunks_overlimit` is disabled, the input won't pause +when the memory limit is reached. Instead, it switches to buffering logs only in +the filesystem. Limit the disk spaced used for filesystem buffering with +`storage.total_limit_size`. + +See [Buffering & Storage](buffering-and-storage.md) docs for more information. + +## About pause and resume callbacks + +Each plugin is independent and not all of them implement `pause` and `resume` +callbacks. These callbacks are a notification mechanism for the plugin. + +One example of a plugin that implements these callbacks and keeps state correctly is +the [Tail Input](../pipeline/inputs/tail.md) plugin. When the `pause` callback +triggers, it pauses its collectors and stops appending data. Upon `resume`, it +resumes the collectors and continues ingesting data. Tail tracks the current file +offset when it pauses, and resumes at the same position. If the file hasn't been +deleted or moved, it can still be read. + +With the default `storage.type memory` and `Mem_Buf_Limit`, the following log +messages emit for `pause` and `resume`: + +```text [warn] [input] {input name or alias} paused (mem buf overlimit) [info] [input] {input name or alias} resume (mem buf overlimit) ``` -With `storage.type filesystem` and `storage.max_chunks_up`, the following log messages will be emitted for pause and resume: +With `storage.type filesystem` and `storage.max_chunks_up`, the following log +messages emit for `pause` and `resume`: -``` -[input] {input name or alias} paused (storage buf overlimit -[input] {input name or alias} resume (storage buf overlimit +```text +[input] {input name or alias} paused (storage buf overlimit) +[input] {input name or alias} resume (storage buf overlimit) ``` diff --git a/administration/buffering-and-storage.md b/administration/buffering-and-storage.md index 1b373bf64..32dc8d657 100644 --- a/administration/buffering-and-storage.md +++ b/administration/buffering-and-storage.md @@ -1,51 +1,92 @@ # Buffering & Storage -The end-goal of [Fluent Bit](https://fluentbit.io) is to collect, parse, filter and ship logs to a central place. In this workflow there are many phases and one of the critical pieces is the ability to do _buffering_ : a mechanism to place processed data into a temporary location until is ready to be shipped. +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=cde12327-09ed-409c-ac02-7c0afa5eff51" /> -By default when Fluent Bit processes data, it uses Memory as a primary and temporary place to store the records, but there are certain scenarios where it would be ideal to have a persistent buffering mechanism based in the filesystem to provide aggregation and data safety capabilities. +[Fluent Bit](https://fluentbit.io) collects, parses, filters, and ships logs to a +central place. A critical piece of this workflow is the ability to do _buffering_: a +mechanism to place processed data into a temporary location until is ready to be +shipped. -Choosing the right configuration is critical and the behavior of the service can be conditioned based in the backpressure settings. Before we jump into the configuration let's make sure we understand the relationship between _Chunks_, _Memory_, _Filesystem_ and _Backpressure_. +By default when Fluent Bit processes data, it uses Memory as a primary and temporary +place to store the records. There are scenarios where it would be ideal +to have a persistent buffering mechanism based in the filesystem to provide +aggregation and data safety capabilities. -## Chunks, Memory, Filesystem and Backpressure +Choosing the right configuration is critical and the behavior of the service can be +conditioned based in the backpressure settings. Before jumping into the configuration +it helps to understand the relationship between _chunks_, _memory_, +_filesystem_, and _backpressure_. -Understanding the chunks, buffering and backpressure concepts is critical for a proper configuration. Let's do a recap of the meaning of these concepts. +## Chunks, memory, filesystem, and backpressure -#### Chunks +Understanding chunks, buffering, and backpressure is critical for a proper +configuration. -When an input plugin \(source\) emits records, the engine groups the records together in a _Chunk_. A Chunk size usually is around 2MB. By configuration, the engine decides where to place this Chunk, the default is that all chunks are created only in memory. +### Backpressure -#### Irrecoverable Chunks +See [Backpressure](https://docs.fluentbit.io/manual/administration/backpressure) +for a full explanation. -There are two scenarios where fluent-bit marks chunks as irrecoverable: +### Chunks -* When Fluent Bit encounters a bad layout in a chunk. A bad layout is a chunk that does not conform to the expected format. [Chunk definition](https://github.com/fluent/fluent-bit/blob/master/CHUNKS.md) +When an input plugin source emits records, the engine groups the records together +in a _chunk_. A chunk's size usually is around 2 MB. By configuration, the engine +decides where to place this chunk. By default, all chunks are created only in +memory. -* When Fluent Bit encounters an incorrect or invalid chunk header size. +### Irrecoverable chunks -In both scenarios Fluent-Bit will log an error message and then discard the irrecoverable chunks. +There are two scenarios where Fluent Bit marks chunks as irrecoverable: -#### Buffering and Memory +- When Fluent Bit encounters a bad layout in a chunk. A bad layout is a chunk that + doesn't conform to the expected format. + [Chunk definition](https://github.com/fluent/fluent-bit/blob/master/CHUNKS.md) -As mentioned above, the Chunks generated by the engine are placed in memory but this is configurable. +- When Fluent Bit encounters an incorrect or invalid chunk header size. -If memory is the only mechanism set for the input plugin, it will just store data as much as it can there \(memory\). This is the fastest mechanism with the least system overhead, but if the service is not able to deliver the records fast enough because of a slow network or an unresponsive remote service, Fluent Bit memory usage will increase since it will accumulate more data than it can deliver. +In both scenarios Fluent Bit logs an error message and then discards the +irrecoverable chunks. -In a high load environment with backpressure the risks of having high memory usage is the chance of getting killed by the Kernel \(OOM Killer\). A workaround for this backpressure scenario is to limit the amount of memory in records that an input plugin can register, this configuration property is called `mem_buf_limit`. If a plugin has enqueued more than the `mem_buf_limit`, it won't be able to ingest more until that data can be delivered or flushed properly. In this scenario the input plugin in question is paused. When the input is paused, records will not be ingested until it is resumed. For some inputs, such as TCP and tail, pausing the input will almost certainly lead to log loss. For the tail input, Fluent Bit can save its current offset in the current file it is reading, and pick back up when the input is resumed. +#### Buffering and memory + +As mentioned previously, chunks generated by the engine are placed in memory by +default, but this is configurable. + +If memory is the only mechanism set for the input plugin, it will store as much data +as possible in memory. This is the fastest mechanism with the least system +overhead. However, if the service isn't able to deliver the records fast enough, +Fluent Bit memory usage increases as it accumulates more data than it can deliver. + +In a high load environment with backpressure, having high memory usage risks getting +killed by the kernel's OOM Killer. To work around this backpressure scenario, +limit the amount of memory in records that an input plugin can register using the +`mem_buf_limit` property. If a +plugin has queued more than the `mem_buf_limit`, it won't be able to ingest more +until that data can be delivered or flushed properly. In this scenario the input +plugin in question is paused. When the input is paused, records won't be ingested +until the plugin resumes. For some inputs, such as TCP and tail, pausing the input will +almost certainly lead to log loss. For the tail input, Fluent Bit can save its +current offset in the current file it's reading, and pick back up when the input +resumes. Look for messages in the Fluent Bit log output like: -``` +```text [input] tail.1 paused (mem buf overlimit) [input] tail.1 resume (mem buf overlimit) ``` -The workaround of `mem_buf_limit` is good for certain scenarios and environments, it helps to control the memory usage of the service, but at the costs that if a file gets rotated while paused, you might lose that data since it won't be able to register new records. This can happen with any input source plugin. The goal of `mem_buf_limit` is memory control and survival of the service. +Using `mem_buf_limit` is good for certain scenarios and environments. It +helps to control the memory usage of the service. However, if a file rotates while +the plugin is paused, data can be lost since it won't be able to +register new records. This can happen with any input source plugin. The goal of +`mem_buf_limit` is memory control and survival of the service. -For full data safety guarantee, use filesystem buffering. +For a full data safety guarantee, use filesystem buffering. Here is an example input definition: -``` +```python [INPUT] Name tcp Listen 0.0.0.0 @@ -55,75 +96,115 @@ Here is an example input definition: Mem_Buf_Limit 50MB ``` -If this input uses more than 50MB memory to buffer logs, you will get a warning like this in the Fluent Bit logs: -``` +If this input uses more than 50 MB memory to buffer logs, you will get a warning like +this in the Fluent Bit logs: + +```text [input] tcp.1 paused (mem buf overlimit) ``` {% hint style="info" %} -`Mem_Buf_Limit` applies only when `storage.type` is set to the default value of +`mem_buf_Limit` applies only when `storage.type` is set to the default value of `memory`. {% endhint %} -The following section explains the applicable limits when you enable -`storage.type filesystem`. - -#### Filesystem buffering to the rescue - -Filesystem buffering enabled helps with backpressure and overall memory control. - -Behind the scenes, Memory and Filesystem buffering mechanisms are **not** mutually exclusive. Indeed when enabling filesystem buffering for your input plugin \(source\) you are getting the best of the two worlds: performance and data safety. - -When Filesystem buffering is enabled, the behavior of the engine is different. Upon Chunk creation, the engine stores the content in memory and also maps a copy on disk \(through [mmap\(2\)](https://man7.org/linux/man-pages/man2/mmap.2.html)\). The newly created Chunk is (1) active in memory, (2) backed up on disk, and (3) is called to be `up` which means "the chunk content is up in memory". - -How does the Filesystem buffering mechanism deal with high memory usage and backpressure? Fluent Bit controls the number of Chunks that are `up` in memory. - -By default, the engine allows us to have 128 Chunks `up` in memory in total \(considering all Chunks\), this value is controlled by service property `storage.max_chunks_up`. The active Chunks that are `up` are ready for delivery and the ones that are still receiving records. Any other remaining Chunk is in a `down` state, which means that it is only in the filesystem and won't be `up` in memory unless it is ready to be delivered. Remember, chunks are never much larger than 2 MB, thus, with the default `storage.max_chunks_up` value of 128, each input is limited to roughly 256 MB of memory. - -If the input plugin has enabled `storage.type` as `filesystem`, when reaching the `storage.max_chunks_up` threshold, instead of the plugin being paused, all new data will go to Chunks that are `down` in the filesystem. This allows us to control the memory usage by the service and also provides a guarantee that the service won't lose any data. By default, the enforcement of the `storage.max_chunks_up` limit is best-effort. Fluent Bit can only append new data to chunks that are `up`; when the limit is reached chunks will be temporarily brought `up` in memory to ingest new data, and then put to a `down` state afterwards. In general, Fluent Bit will work to keep the total number of `up` chunks at or below `storage.max_chunks_up`. - -If `storage.pause_on_chunks_overlimit` is enabled (default is off), the input plugin will be paused upon exceeding `storage.max_chunks_up`. Thus, with this option, `storage.max_chunks_up` becomes a hard limit for the input. When the input is paused, records will not be ingested until it is resumed. For some inputs, such as TCP and tail, pausing the input will almost certainly lead to log loss. For the tail input, Fluent Bit can save its current offset in the current file it is reading, and pick back up when the input is resumed. +#### Filesystem buffering + +Filesystem buffering helps with backpressure and overall memory control. Enable it +using `storage.type filesystem`. + +Memory and filesystem buffering mechanisms aren't mutually exclusive. Enabling +filesystem buffering for your input plugin source can improve both performance and +data safety. + +Enabling filesystem buffering changes the behavior of the engine. Upon chunk +creation, the engine stores the content in memory and also maps a copy on disk +through [mmap(2)](https://man7.org/linux/man-pages/man2/mmap.2.html). The newly +created chunk is active in memory, backed up on disk, and called to be +`up`, which means the chunk content is up in memory. + +Fluent Bit controls the number of chunks that are `up` in memory by using the +filesystem buffering mechanism to deal with high memory usage and +backpressure. + +By default, the engine allows a total of 128 chunks `up` in memory in total, +considering all chunks. This value is controlled by the service property +`storage.max_chunks_up`. The active chunks that are `up` are ready for delivery +and are still receiving records. Any other remaining chunk is in a `down` +state, which means that it's only in the filesystem and won't be `up` in memory +unless it's ready to be delivered. Chunks are never much larger than 2 MB, +so with the default `storage.max_chunks_up` value of 128, each input is limited to +roughly 256 MB of memory. + +If the input plugin has enabled `storage.type` as `filesystem`, when reaching the +`storage.max_chunks_up` threshold, instead of the plugin being paused, all new data +will go to chunks that are `down` in the filesystem. This lets you control +memory usage by the service and also provides a guarantee that the service won't lose +any data. By default, the enforcement of the `storage.max_chunks_up` limit is +best-effort. Fluent Bit can only append new data to chunks that are `up`. When the +limit is reached chunks will be temporarily brought `up` in memory to ingest new +data, and then put to a `down` state afterwards. In general, Fluent Bit works to +keep the total number of `up` chunks at or below `storage.max_chunks_up`. + +If `storage.pause_on_chunks_overlimit` is enabled (default is off), the input plugin +pauses upon exceeding `storage.max_chunks_up`. With this option, +`storage.max_chunks_up` becomes a hard limit for the input. When the input is paused, +records won't be ingested until the plugin resumes. For some inputs, such as TCP and +tail, pausing the input will almost certainly lead to log loss. For the tail input, +Fluent Bit can save its current offset in the current file it's reading, and pick +back up when the input is resumed. Look for messages in the Fluent Bit log output like: -``` +```text [input] tail.1 paused (storage buf overlimit [input] tail.1 resume (storage buf overlimit ``` -**Limiting Filesystem space for Chunks** +##### Limiting filesystem space for chunks -Fluent Bit implements the concept of logical queues: based on its Tag, a Chunk can be routed to multiple destinations. Thus, we keep an internal reference from where a Chunk was created and where it needs to go. +Fluent Bit implements the concept of logical queues. Based on its tag, a chunk can be +routed to multiple destinations. Fluent Bit keeps an internal reference from where a +chunk was created and where it needs to go. -It's common to find cases where if we have multiple destinations for a Chunk, one of the destinations might be slower than the other, or maybe one is generating backpressure and not all of them. In this scenario, how do we limit the amount of filesystem Chunks that we are logically queueing? +It's common to find cases where multiple destinations with different response times +exist for a chunk, or one of the destinations is generating backpressure. -Starting from Fluent Bit v1.6, we introduced the new configuration property for output plugins called `storage.total_limit_size` which limits the total size in bytes of chunks that can exist in the filesystem for a certain logical output destination. If one of the destinations reaches the configured `storage.total_limit_size`, the oldest Chunk from its queue for that logical output destination will be discarded to make room for new data. +To limit the amount of filesystem chunks logically queueing, Fluent Bit v1.6 and +later includes the `storage.total_limit_size` configuration property for output +This property limits the total size in bytes of chunks that can exist in the +filesystem for a certain logical output destination. If one of the destinations +reaches the configured `storage.total_limit_size`, the oldest chunk from its queue +for that logical output destination will be discarded to make room for new data. ## Configuration -The storage layer configuration takes place in three areas: +The storage layer configuration takes place in three sections: -* Service Section -* Input Section -* Output Section +- Service +- Input +- Output -The known Service section configures a global environment for the storage layer, the Input sections define which buffering mechanism to use and the output the limits for the logical filesystem queues. +The known Service section configures a global environment for the storage layer, the +Input sections define which buffering mechanism to use, and the Output defines limits for +the logical filesystem queues. -### Service Section Configuration +### Service section configuration -The Service section refers to the section defined in the main [configuration file](configuring-fluent-bit/classic-mode/configuration-file.md): +The Service section refers to the section defined in the main +[configuration file](configuring-fluent-bit/classic-mode/configuration-file.md): | Key | Description | Default | | :--- | :--- | :--- | -| storage.path | Set an optional location in the file system to store streams and chunks of data. If this parameter is not set, Input plugins can only use in-memory buffering. | | -| storage.sync | Configure the synchronization mode used to store the data into the file system. It can take the values _normal_ or _full_. Using _full_ increases the reliability of the filesystem buffer and ensures that data is guaranteed to be synced to the filesystem even if Fluent Bit crashes. On linux, _full_ corresponds with the `MAP_SYNC` option for [memory mapped files](https://man7.org/linux/man-pages/man2/mmap.2.html). | normal | -| storage.checksum | Enable the data integrity check when writing and reading data from the filesystem. The storage layer uses the CRC32 algorithm. | Off | -| storage.max\_chunks\_up | If the input plugin has enabled `filesystem` storage type, this property sets the maximum number of Chunks that can be `up` in memory. *This is the setting to use to control memory usage when you enable `storage.type filesystem`*. | 128 | -| storage.backlog.mem\_limit | If _storage.path_ is set, Fluent Bit will look for data chunks that were not delivered and are still in the storage layer, these are called _backlog_ data. _Backlog chunks_ are filesystem chunks that were left over from a previous Fluent Bit run; chunks that could not be sent before exit that Fluent Bit will pick up when restarted. Fluent Bit will check the `storage.backlog.mem_limit` value against the current memory usage from all `up` chunks for the input. If the `up` chunks currently consume less memory than the limit, it will bring the _backlog_ chunks up into memory so they can be sent by outputs. | 5M | -| storage.metrics | If `http_server` option has been enabled in the main `[SERVICE]` section, this option registers a new endpoint where internal metrics of the storage layer can be consumed. For more details refer to the [Monitoring](monitoring.md) section. | off | -| storage.delete_irrecoverable_chunks | When enabled, [irrecoverable chunks](./buffering-and-storage.md#irrecoverable-chunks) will be deleted during runtime, and any other irrecoverable chunk located in the configured storage path directory will be deleted when Fluent-Bit starts. | Off | +| `storage.path` | Set an optional location in the file system to store streams and chunks of data. If this parameter isn't set, Input plugins can only use in-memory buffering. | _none_ | +| `storage.sync` | Configure the synchronization mode used to store the data in the file system. Using `full` increases the reliability of the filesystem buffer and ensures that data is guaranteed to be synced to the filesystem even if Fluent Bit crashes. On Linux, `full` corresponds with the `MAP_SYNC` option for [memory mapped files](https://man7.org/linux/man-pages/man2/mmap.2.html). Accepted values: `normal`, `full`. | `normal` | +| `storage.checksum` | Enable the data integrity check when writing and reading data from the filesystem. The storage layer uses the CRC32 algorithm. Accepted values: `Off`, `On`. | `Off` | +| `storage.max_chunks_up` | If the input plugin has enabled `filesystem` storage type, this property sets the maximum number of chunks that can be `up` in memory. Use this setting to control memory usage when you enable `storage.type filesystem`. | `128` | +| `storage.backlog.mem_limit` | If `storage.path` is set, Fluent Bit looks for data chunks that weren't delivered and are still in the storage layer. These are called _backlog_ data. _Backlog chunks_ are filesystem chunks that were left over from a previous Fluent Bit run; chunks that couldn't be sent before exit that Fluent Bit will pick up when restarted. Fluent Bit will check the `storage.backlog.mem_limit` value against the current memory usage from all `up` chunks for the input. If the `up` chunks currently consume less memory than the limit, it will bring the _backlog_ chunks up into memory so they can be sent by outputs. | `5M` | +| `storage.metrics` | If `http_server` option is enabled in the main `[SERVICE]` section, this option registers a new endpoint where internal metrics of the storage layer can be consumed. For more details refer to the [Monitoring](monitoring.md) section. | `off` | +| `storage.delete_irrecoverable_chunks` | When enabled, [irrecoverable chunks](./buffering-and-storage.md#irrecoverable-chunks) will be deleted during runtime, and any other irrecoverable chunk located in the configured storage path directory will be deleted when Fluent-Bit starts. Accepted values: 'Off`, 'On`. | `Off` | -a Service section will look like this: +A Service section will look like this: ```python [SERVICE] @@ -135,18 +216,23 @@ a Service section will look like this: storage.backlog.mem_limit 5M ``` -that configuration sets an optional buffering mechanism where the route to the data is _/var/log/flb-storage/_, it will use _normal_ synchronization mode, without running a checksum and up to a maximum of 5MB of memory when processing backlog data. +This configuration sets an optional buffering mechanism where the route to the data +is `/var/log/flb-storage/`. It uses `normal` synchronization mode, without +running a checksum and up to a maximum of 5 MB of memory when processing backlog data. ### Input Section Configuration -Optionally, any Input plugin can configure their storage preference, the following table describes the options available: +Optionally, any Input plugin can configure their storage preference. The following +table describes the options available: | Key | Description | Default | | :--- | :--- | :--- | -| storage.type | Specifies the buffering mechanism to use. It can be _memory_ or _filesystem_. | memory | -| storage.pause_on_chunks_overlimit | Specifies if the input plugin should be paused (stop ingesting new data) when the `storage.max_chunks_up` value is reached. | off | +| `storage.type` | Specifies the buffering mechanism to use. Accepted values: `memory`, `filesystem`. | `memory` | +| `storage.pause_on_chunks_overlimit` | Specifies if the input plugin should pause (stop ingesting new data) when the `storage.max_chunks_up` value is reached. |`off` | -The following example configures a service that offers filesystem buffering capabilities and two Input plugins being the first based in filesystem and the second with memory only. +The following example configures a service offering filesystem buffering +capabilities and two input plugins being the first based in filesystem and the second +with memory only. ```python [SERVICE] @@ -169,15 +255,19 @@ The following example configures a service that offers filesystem buffering capa ### Output Section Configuration -If certain chunks are filesystem _storage.type_ based, it's possible to control the size of the logical queue for an output plugin. The following table describes the options available: +If certain chunks are filesystem `storage.type` based, it's possible to control the +size of the logical queue for an output plugin. The following table describes the +options available: | Key | Description | Default | | :--- | :--- | :--- | -| storage.total\_limit\_size | Limit the maximum disk space size in bytes for buffering chunks in the filesystem for the current output logical destination. | | +| `storage.total_limit_size` | Limit the maximum disk space size in bytes for buffering chunks in the filesystem for the current output logical destination. | _none_ | -The following example create records with CPU usage samples in the filesystem and then they are delivered to Google Stackdriver service limiting the logical queue \(buffering\) to 5M: +The following example creates records with CPU usage samples in the filesystem which +are delivered to Google Stackdriver service while limiting the logical queue +(buffering) to `5M`: -```text +```python [SERVICE] flush 1 log_Level info @@ -189,7 +279,7 @@ The following example create records with CPU usage samples in the filesystem an [INPUT] name cpu - storage.type filesystem + storage.type filesystem [OUTPUT] name stackdriver @@ -197,5 +287,5 @@ The following example create records with CPU usage samples in the filesystem an storage.total_limit_size 5M ``` -If for some reason Fluent Bit gets offline because of a network issue, it will continue buffering CPU samples but just keep a maximum of 5MB of the newest data. - +If Fluent Bit is offline because of a network issue, it will continue buffering CPU +samples, keeping a maximum of 5 MB of the newest data. diff --git a/administration/configuring-fluent-bit/README.md b/administration/configuring-fluent-bit/README.md index e1a73a507..29498ed79 100644 --- a/administration/configuring-fluent-bit/README.md +++ b/administration/configuring-fluent-bit/README.md @@ -2,12 +2,12 @@ Currently, Fluent Bit supports two configuration formats: -* [Classic mode](classic-mode/README.md). -* [Yaml](yaml/README.md). (YAML configuration is production ready since Fluent Bit 2.0.) +* [Yaml](yaml/README.md): standard configuration format as of v3.2. +* [Classic mode](classic-mode/README.md): to be deprecated at the end of 2025. -## CLI flags +## Command line interface -Fluent Bit also supports a CLI interface with various flags matching up to the configuration options available. +Fluent Bit exposes most of it features through the command line interface. Running the `-h` option you can get a list of the options available: ```shell $ docker run --rm -it fluent/fluent-bit --help diff --git a/administration/configuring-fluent-bit/classic-mode/configuration-file.md b/administration/configuring-fluent-bit/classic-mode/configuration-file.md index 60edb21e2..431866276 100644 --- a/administration/configuring-fluent-bit/classic-mode/configuration-file.md +++ b/administration/configuring-fluent-bit/classic-mode/configuration-file.md @@ -1,47 +1,51 @@ --- -description: This page describes the main configuration file used by Fluent Bit +description: This page describes the main configuration file used by Fluent Bit. --- -# Configuration File +# Configuration file -One of the ways to configure Fluent Bit is using a main configuration file. Fluent Bit allows to use one configuration file which works at a global scope and uses the [Format and Schema](format-schema.md) defined previously. +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=5e67142e-3887-4b56-b940-18494bcc23a7" /> -The main configuration file supports four types of sections: +One of the ways to configure Fluent Bit is using a main configuration file. Fluent +Bit allows the use one configuration file that works at a global scope and uses the +defined [Format and Schema](format-schema.md). -* Service -* Input -* Filter -* Output +The main configuration file supports four sections: -In addition, it's also possible to split the main configuration file in multiple files using the feature to include external files: +- Service +- Input +- Filter +- Output -* Include File +It's also possible to split the main configuration file into multiple files using +the Include File feature to include external files. -## Service <a href="config_section" id="config_section"></a> +## Service -The _Service_ section defines global properties of the service, the keys available as of this version are described in the following table: +The `Service` section defines global properties of the service. The following keys +are: -| Key | Description | Default Value | -| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | -| flush | Set the flush time in `seconds.nanoseconds`. The engine loop uses a Flush timeout to define when is required to flush the records ingested by input plugins through the defined output plugins. | 5 | -| grace | Set the grace time in `seconds` as Integer value. The engine loop uses a Grace timeout to define wait time on exit | 5 | -| daemon | Boolean value to set if Fluent Bit should run as a Daemon (background) or not. Allowed values are: yes, no, on and off. note: If you are using a Systemd based unit as the one we provide in our packages, do not turn on this option. | Off | -| dns.mode | Set the primary transport layer protocol used by the asynchronous DNS resolver which can be overridden on a per plugin basis | UDP | -| log_file | Absolute path for an optional log file. By default all logs are redirected to the standard error interface (stderr). | | -| log_level | Set the logging verbosity level. Allowed values are: off, error, warn, info, debug and trace. Values are accumulative, e.g: if 'debug' is set, it will include error, warning, info and debug. Note that _trace_ mode is only available if Fluent Bit was built with the _WITH\_TRACE_ option enabled. | info | -| parsers_file | Path for a `parsers` configuration file. Multiple Parsers_File entries can be defined within the section. | | -| plugins_file | Path for a `plugins` configuration file. A _plugins_ configuration file allows to define paths for external plugins, for an example [see here](https://github.com/fluent/fluent-bit/blob/master/conf/plugins.conf). | | -| streams_file | Path for the Stream Processor configuration file. To learn more about Stream Processing configuration go [here](../../../stream-processing/introduction.md). | | -| http_server | Enable built-in HTTP Server | Off | -| http_listen | Set listening interface for HTTP Server when it's enabled | 0.0.0.0 | -| http_port | Set TCP Port for the HTTP Server | 2020 | -| coro_stack_size | Set the coroutines stack size in bytes. The value must be greater than the page size of the running system. Don't set too small value (say 4096), or coroutine threads can overrun the stack buffer. Do not change the default value of this parameter unless you know what you are doing. | 24576 | -| scheduler.cap | Set a maximum retry time in second. The property is supported from v1.8.7. | 2000 | -| scheduler.base | Set a base of exponential backoff. The property is supported from v1.8.7. | 5 | -| json.convert_nan_to_null | If enabled, NaN is converted to null when fluent-bit converts msgpack to json. | false | -| sp.convert_from_str_to_num | If enabled, Stream processor converts from number string to number type. | true | +| Key | Description | Default Value | +| --------------- | ------------- | ------------- | +| `flush` | Set the flush time in `seconds.nanoseconds`. The engine loop uses a Flush timeout to define when it's required to flush the records ingested by input plugins through the defined output plugins. | `1` | +| `grace` | Set the grace time in `seconds` as an integer value. The engine loop uses a grace timeout to define wait time on exit. | `5` | +| daemon | Boolean. Determines whether Fluent Bit should run as a Daemon (background). Allowed values are: `yes`, `no`, `on`, and `off`. Don't enable when using a Systemd based unit, such as the one provided in Fluent Bit packages. | `Off` | +| `dns.mode` | Set the primary transport layer protocol used by the asynchronous DNS resolver. Can be overridden on a per plugin basis. | `UDP` | +| `log_file` | Absolute path for an optional log file. By default all logs are redirected to the standard error interface (stderr). | _none_ | +| `log_level` | Set the logging verbosity level. Allowed values are: `off`, `error`, `warn`, `info`, `debug`, and `trace`. Values are cumulative. If `debug` is set, it will include `error`, `warning`, `info`, and `debug`. Trace mode is only available if Fluent Bit was built with the _`WITH_TRACE`_ option enabled. | `info` | +| `parsers_file` | Path for a `parsers` configuration file. Multiple `Parsers_File` entries can be defined within the section. | _none_ | +| `plugins_file` | Path for a `plugins` configuration file. A `plugins` configuration file defines paths for external plugins. [See an example](https://github.com/fluent/fluent-bit/blob/master/conf/plugins.conf). | _none_ | +| `streams_file` | Path for the Stream Processor configuration file. [Learn more about Stream Processing configuration](../../../stream-processing/introduction.md). | _none_| +| `http_server` | Enable the built-in HTTP Server. | `Off` | +| `http_listen` | Set listening interface for HTTP Server when it's enabled. | `0.0.0.0` | +| `http_port` | Set TCP Port for the HTTP Server. | `2020` | +| `coro_stack_size` | Set the coroutines stack size in bytes. The value must be greater than the page size of the running system. Setting the value too small (`4096`) can cause coroutine threads to overrun the stack buffer. The default value of this parameter shouldn't be changed. | `24576` | +| `scheduler.cap` | Set a maximum retry time in seconds. Supported in v1.8.7 and greater. | `2000` | +| `scheduler.base` | Set a base of exponential backoff. Supported in v1.8.7 and greater. | `5` | +| `json.convert_nan_to_null` | If enabled, `NaN` converts to `null` when Fluent Bit converts `msgpack` to `json`. | `false` | +| `sp.convert_from_str_to_num` | If enabled, Stream processor converts from number string to number type. | `true` | -The following is an example of a _SERVICE_ section: +The following is an example of a `SERVICE` section: ```python [SERVICE] @@ -49,23 +53,28 @@ The following is an example of a _SERVICE_ section: Daemon off Log_Level debug ``` -For scheduler and retry details, please check there: [scheduling and retries](../../scheduling-and-retries.md#Scheduling-and-Retries) -## Input <a href="config_input" id="config_input"></a> +For scheduler and retry details, see [scheduling and retries](../../scheduling-and-retries.md#Scheduling-and-Retries). -An _INPUT_ section defines a source (related to an input plugin), here we will describe the base configuration for each _INPUT_ section. Note that each input plugin may add it own configuration keys: +## Config input -| Key | Description | -| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Name | Name of the input plugin. | -| Tag | Tag name associated to all records coming from this plugin. | -| Log_Level | Set the plugin's logging verbosity level. Allowed values are: off, error, warn, info, debug and trace. Defaults to the _SERVICE_ section's _Log_Level._ | +The `INPUT` section defines a source (related to an input plugin). Each +[input plugin](https://docs.fluentbit.io/manual/pipeline/inputs) can add its own +configuration keys: -The _Name_ is mandatory and it let Fluent Bit know which input plugin should be loaded. The _Tag_ is mandatory for all plugins except for the _input forward_ plugin (as it provides dynamic tags). +| Key | Description | +| ----------- | ------------| +| `Name` | Name of the input plugin. | +| `Tag` | Tag name associated to all records coming from this plugin. | +| `Log_Level` | Set the plugin's logging verbosity level. Allowed values are: `off`, `error`, `warn`, `info`, `debug`, and `trace`. Defaults to the `SERVICE` section's `Log_Level`. | + +`Name` is mandatory and tells Fluent Bit which input plugin to load. `Tag` is +mandatory for all plugins except for the `input forward` plugin, which provides +dynamic tags. ### Example -The following is an example of an _INPUT_ section: +The following is an example of an `INPUT` section: ```python [INPUT] @@ -73,22 +82,26 @@ The following is an example of an _INPUT_ section: Tag my_cpu ``` -## Filter <a href="config_filter" id="config_filter"></a> +## Config filter -A _FILTER_ section defines a filter (related to an filter plugin), here we will describe the base configuration for each _FILTER_ section. Note that each filter plugin may add it own configuration keys: +The `FILTER` section defines a filter (related to an filter plugin). Each filter +plugin can add it own configuration keys. The base configuration for each +`FILTER` section contains: -| Key | Description | -| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Name | Name of the filter plugin. | -| Match | A pattern to match against the tags of incoming records. It's case sensitive and support the star (\*) character as a wildcard. | -| Match_Regex | A regular expression to match against the tags of incoming records. Use this option if you want to use the full regex syntax. | -| Log_Level | Set the plugin's logging verbosity level. Allowed values are: off, error, warn, info, debug and trace. Defaults to the _SERVICE_ section's _Log_Level._ | +| Key | Description | +| ----------- | ------------ | +| `Name` | Name of the filter plugin. | +| `Match` | A pattern to match against the tags of incoming records. Case sensitive, supports asterisk (`*`) as a wildcard. | +| `Match_Regex` | A regular expression to match against the tags of incoming records. Use this option if you want to use the full regular expression syntax. | +| `Log_Level` | Set the plugin's logging verbosity level. Allowed values are: `off`, `error`, `warn`, `info`, `debug`, and `trace`. Defaults to the `SERVICE` section's `Log_Level`. | -The _Name_ is mandatory and it let Fluent Bit know which filter plugin should be loaded. The _Match_ or _Match_Regex_ is mandatory for all plugins. If both are specified, _Match_Regex_ takes precedence. +`Name` is mandatory and lets Fluent Bit know which filter plugin should be loaded. +`Match` or `Match_Regex` is mandatory for all plugins. If both are specified, +`Match_Regex` takes precedence. -### Example +### Filter example -The following is an example of an _FILTER_ section: +The following is an example of a `FILTER` section: ```python [FILTER] @@ -97,20 +110,22 @@ The following is an example of an _FILTER_ section: Regex log aa ``` -## Output <a href="config_output" id="config_output"></a> +## Config output -The _OUTPUT_ section specify a destination that certain records should follow after a Tag match. Currently, Fluent Bit can route up to 256 _OUTPUT_ plugins. The configuration support the following keys: +The `OUTPUT` section specifies a destination that certain records should go to +after a `Tag` match. Fluent Bit can route up to 256 `OUTPUT` plugins. The +configuration supports the following keys: -| Key | Description | -| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Name | Name of the output plugin. | -| Match | A pattern to match against the tags of incoming records. It's case sensitive and support the star (\*) character as a wildcard. | -| Match_Regex | A regular expression to match against the tags of incoming records. Use this option if you want to use the full regex syntax. | -| Log_Level | Set the plugin's logging verbosity level. Allowed values are: off, error, warn, info, debug and trace. Defaults to the _SERVICE_ section's _Log_Level._ | +| Key | Description | +| ----------- | -------------- | +| `Name` | Name of the output plugin. | +| `Match` | A pattern to match against the tags of incoming records. Case sensitive and supports the asterisk (`*`) character as a wildcard. | +| `Match_Regex` | A regular expression to match against the tags of incoming records. Use this option if you want to use the full regular expression syntax. | +| `Log_Level` | Set the plugin's logging verbosity level. Allowed values are: `off`, `error`, `warn`, `info`, `debug`, and `trace`. Defaults to the `SERVICE` section's `Log_Level`. | -### Example +### Output example -The following is an example of an _OUTPUT_ section: +The following is an example of an `OUTPUT` section: ```python [OUTPUT] @@ -120,7 +135,8 @@ The following is an example of an _OUTPUT_ section: ### Example: collecting CPU metrics -The following configuration file example demonstrates how to collect CPU metrics and flush the results every five seconds to the standard output: +The following configuration file example demonstrates how to collect CPU metrics and +flush the results every five seconds to the standard output: ```python [SERVICE] @@ -137,35 +153,32 @@ The following configuration file example demonstrates how to collect CPU metrics Match my*cpu ``` -## Visualize <a href="config_include_file" id="config_include_file"></a> - -You can also visualize Fluent Bit INPUT, FILTER, and OUTPUT configuration via [Calyptia](https://calyptia.com/free-trial) - - +## Config Include File -## Include File <a href="config_include_file" id="config_include_file"></a> +To avoid complicated long configuration files is better to split specific parts in +different files and call them (include) from one main file. The `@INCLUDE` can be used +in the following way: -To avoid complicated long configuration files is better to split specific parts in different files and call them (include) from one main file. - -Starting from Fluent Bit 0.12 the new configuration command _@INCLUDE_ has been added and can be used in the following way: - -``` +```text @INCLUDE somefile.conf ``` -The configuration reader will try to open the path _somefile.conf_, if not found, it will assume it's a relative path based on the path of the base configuration file, e.g: +The configuration reader will try to open the path `somefile.conf`. If not found, the +reader assumes the file is on a relative path based on the path of the base +configuration file: -* Main configuration file path: /tmp/main.conf -* Included file: somefile.conf -* Fluent Bit will try to open somefile.conf, if it fails it will try /tmp/somefile.conf. +- Main configuration path: `/tmp/main.conf` +- Included file: `somefile.conf` +- Fluent Bit will try to open `somefile.conf`, if it fails it will try `/tmp/somefile.conf`. -The _@INCLUDE_ command only works at top-left level of the configuration line, it cannot be used inside sections. +The `@INCLUDE` command only works at top-left level of the configuration line, and +can't be used inside sections. -Wildcard character (\*) is supported to include multiple files, e.g: +Wildcard character (`*`) supports including multiple files. For example: -``` +```text @INCLUDE input_*.conf ``` -Note files matching the wildcard character are included unsorted. -If plugins ordering between files need to be preserved, the files should be included explicitly. +Files matching the wildcard character are included unsorted. If plugin ordering +between files needs to be preserved, the files should be included explicitly. diff --git a/administration/configuring-fluent-bit/classic-mode/variables.md b/administration/configuring-fluent-bit/classic-mode/variables.md index 11e32a56b..1344e4f4c 100644 --- a/administration/configuring-fluent-bit/classic-mode/variables.md +++ b/administration/configuring-fluent-bit/classic-mode/variables.md @@ -1,5 +1,7 @@ # Variables +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=1731c7b5-34c6-424f-bfc6-88c2aa71e81f" /> + Fluent Bit supports the usage of environment variables in any value associated to a key when using a configuration file. The variables are case sensitive and can be used in the following format: diff --git a/administration/configuring-fluent-bit/multiline-parsing.md b/administration/configuring-fluent-bit/multiline-parsing.md index f6fa1e379..d7965c9ee 100644 --- a/administration/configuring-fluent-bit/multiline-parsing.md +++ b/administration/configuring-fluent-bit/multiline-parsing.md @@ -2,6 +2,8 @@ In an ideal world, applications might log their messages within a single line, but in reality applications generate multiple log messages that sometimes belong to the same context. But when is time to process such information it gets really complex. Consider application stack traces which always have multiple log lines. +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=e19a4c14-a9e4-4163-8f3a-52196eb9a585" /> + Starting from Fluent Bit v1.8, we have implemented a unified Multiline core functionality to solve all the user corner cases. In this section, you will learn about the features and configuration options available. ## Concepts diff --git a/administration/configuring-fluent-bit/yaml/README.md b/administration/configuring-fluent-bit/yaml/README.md index ce6e7f8c4..a62354cf3 100644 --- a/administration/configuring-fluent-bit/yaml/README.md +++ b/administration/configuring-fluent-bit/yaml/README.md @@ -1,3 +1,44 @@ -# Fluent Bit YAML configuration +# Fluent Bit YAML Configuration -YAML configuration feature was introduced since FLuent Bit version 1.9 as experimental, and it is production ready since Fluent Bit 2.0. +## Before You Get Started + +Fluent Bit traditionally offered a `classic` configuration mode, a custom configuration format that we are gradually phasing out. While `classic` mode has served well for many years, it has several limitations. Its basic design only supports grouping sections with key-value pairs and lacks the ability to handle sub-sections or complex data structures like lists. + +YAML, now a mainstream configuration format, has become essential in a cloud ecosystem where everything is configured this way. To minimize friction and provide a more intuitive experience for creating data pipelines, we strongly encourage users to transition to YAML. The YAML format enables features, such as processors, that are not possible to configure in `classic` mode. + +As of Fluent Bit v3.2, you can configure everything in YAML. + +## List of Available Sections + +Configuring Fluent Bit with YAML introduces the following root-level sections: + +| Section Name |Description | +|----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------| +| `service` | Describes the global configuration for the Fluent Bit service. This section is optional; if not set, default values will apply. Only one `service` section can be defined. | +| `parsers` | Lists parsers to be used by components like inputs, processors, filters, or output plugins. You can define multiple `parsers` sections, which can also be loaded from external files included in the main YAML configuration. | +| `multiline_parsers` | Lists multiline parsers, functioning similarly to `parsers`. Multiple definitions can exist either in the root or in included files. | +| `pipeline` | Defines a pipeline composed of inputs, processors, filters, and output plugins. You can define multiple `pipeline` sections, but they will not operate independently. Instead, all components will be merged into a single pipeline internally. | +| `plugins` | Specifies the path to external plugins (.so files) to be loaded by Fluent Bit at runtime. | +| `upstream_servers` | Refers to a group of node endpoints that can be referenced by output plugins that support this feature. | +| `env` | Sets a list of environment variables for Fluent Bit. Note that system environment variables are available, while the ones defined in the configuration apply only to Fluent Bit. | + +## Section Documentation + +To access detailed configuration guides for each section, use the following links: + +- [Service Section documentation](service-section.md) + - Overview of global settings, configuration options, and examples. +- [Parsers Section documentation](parsers-section.md) + - Detailed guide on defining parsers and supported formats. +- [Multiline Parsers Section documentation](multiline-parsers-section.md) + - Explanation of multiline parsing configuration. +- [Pipeline Section documentation](pipeline-section.md) + - Details on setting up pipelines and using processors. +- [Plugins Section documentation](plugins-section.md) + - How to load external plugins. +- [Upstream Servers Section documentation](upstream-servers-section.md) + - Guide on setting up and using upstream nodes with supported plugins. +- [Environment Variables Section documentation](environment-variables-section.md) + - Information on setting environment variables and their scope within Fluent Bit. +- [Includes Section documentation](includes-section.md) + - Description on how to include external YAML files. diff --git a/administration/configuring-fluent-bit/yaml/configuration-file.md b/administration/configuring-fluent-bit/yaml/configuration-file.md index 0205ccc5f..87e062799 100644 --- a/administration/configuring-fluent-bit/yaml/configuration-file.md +++ b/administration/configuring-fluent-bit/yaml/configuration-file.md @@ -2,6 +2,8 @@ description: This page describes the yaml configuration file used by Fluent Bit --- +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=864c6f0e-8977-4838-8772-84416943548e" /> + # YAML Configuration File One of the ways to configure Fluent Bit is using a YAML configuration file that works at a global scope. diff --git a/administration/configuring-fluent-bit/yaml/environment-variables-section.md b/administration/configuring-fluent-bit/yaml/environment-variables-section.md new file mode 100644 index 000000000..7ca377ac2 --- /dev/null +++ b/administration/configuring-fluent-bit/yaml/environment-variables-section.md @@ -0,0 +1,61 @@ +# Environment Variables Section + +The `env` section allows you to define environment variables directly within the configuration file. These variables can then be used to dynamically replace values throughout your configuration using the `${VARIABLE_NAME}` syntax. + +Values set in the `env` section are case-sensitive. However, as a best practice, we recommend using uppercase names for environment variables. The example below defines two variables, `FLUSH_INTERVAL` and `STDOUT_FMT`, which can be accessed in the configuration using `${FLUSH_INTERVAL}` and `${STDOUT_FMT}`: + +```yaml +env: + FLUSH_INTERVAL: 1 + STDOUT_FMT: 'json_lines' + +service: + flush: ${FLUSH_INTERVAL} + log_level: info + +pipeline: + inputs: + - name: random + + outputs: + - name: stdout + match: '*' + format: ${STDOUT_FMT} +``` + +## Predefined Variables + +Fluent Bit provides a set of predefined environment variables that can be used in your configuration: + +| Name | Description | +|--|--| +| `${HOSTNAME}` | The system’s hostname. | + +## External Variables + +In addition to variables defined in the configuration file or the predefined ones, Fluent Bit can access system environment variables set in the user space. These external variables can be referenced in the configuration using the same ${VARIABLE_NAME} pattern. + +For example, to set the FLUSH_INTERVAL system environment variable to 2 and use it in your configuration: + +```bash +export FLUSH_INTERVAL=2 +``` + +In the configuration file, you can then access this value as follows: + +```yaml +service: + flush: ${FLUSH_INTERVAL} + log_level: info + +pipeline: + inputs: + - name: random + + outputs: + - name: stdout + match: '*' + format: json_lines +``` + +This approach allows you to easily manage and override configuration values using environment variables, providing flexibility in various deployment environments. diff --git a/administration/configuring-fluent-bit/yaml/includes-section.md b/administration/configuring-fluent-bit/yaml/includes-section.md new file mode 100644 index 000000000..c36e4b755 --- /dev/null +++ b/administration/configuring-fluent-bit/yaml/includes-section.md @@ -0,0 +1,32 @@ +# Includes Section + +The `includes` section allows you to specify additional YAML configuration files to be merged into the current configuration. These files are identified as a list of filenames and can include relative or absolute paths. If no absolute path is provided, the file is assumed to be located in a directory relative to the file that references it. + +This feature is useful for organizing complex configurations into smaller, manageable files and including them as needed. + +### Usage + +Below is an example demonstrating how to include additional YAML files using relative path references. This is the file system path structure + +``` +├── fluent-bit.yaml +├── inclusion-1.yaml +└── subdir + └── inclusion-2.yaml +``` + +The content of `fluent-bit.yaml` + +```yaml +includes: + - inclusion-1.yaml + - subdir/inclusion-2.yaml +``` + +## Key Points + +- Relative Paths: If a path is not specified as absolute, it will be treated as relative to the file that includes it. + +- Organized Configurations: Using the includes section helps keep your configuration modular and easier to maintain. + +> note: Ensure that the included files are formatted correctly and contain valid YAML configurations for seamless integration. diff --git a/administration/configuring-fluent-bit/yaml/multiline-parsers-section.md b/administration/configuring-fluent-bit/yaml/multiline-parsers-section.md new file mode 100644 index 000000000..340fdea28 --- /dev/null +++ b/administration/configuring-fluent-bit/yaml/multiline-parsers-section.md @@ -0,0 +1,26 @@ +# Multiline Parsers + +Multiline parsers are used to combine logs that span multiple events into a single, cohesive message. This is particularly useful for handling stack traces, error logs, or any log entry that contains multiple lines of information. + +In YAML configuration, the syntax for defining multiline parsers differs slightly from the classic configuration format introducing minor breaking changes, specifically on how the rules are defined. + +Below is an example demonstrating how to define a multiline parser directly in the main configuration file, as well as how to include additional definitions from external files: + +```yaml +multiline_parsers: + - name: multiline-regex-test + type: regex + flush_timeout: 1000 + rules: + - state: start_state + regex: '/([a-zA-Z]+ \d+ \d+:\d+:\d+)(.*)/' + next_state: cont + - state: cont + regex: '/^\s+at.*/' + next_state: cont +``` + +The example above defines a multiline parser named `multiline-regex-test` that uses regular expressions to handle multi-event logs. The parser contains two rules: the first rule transitions from start_state to cont when a matching log entry is detected, and the second rule continues to match subsequent lines. + +For more detailed information on configuring multiline parsers, including advanced options and use cases, please refer to the Configuring Multiline Parsers section. + diff --git a/administration/configuring-fluent-bit/yaml/parsers-section.md b/administration/configuring-fluent-bit/yaml/parsers-section.md new file mode 100644 index 000000000..f34e17abd --- /dev/null +++ b/administration/configuring-fluent-bit/yaml/parsers-section.md @@ -0,0 +1,23 @@ +# Parsers Section + +Parsers enable Fluent Bit components to transform unstructured data into a structured internal representation. You can define parsers either directly in the main configuration file or in separate external files for better organization. + +This page provides a general overview of how to declare parsers. + +The main section name is `parsers`, and it allows you to define a list of parser configurations. The following example demonstrates how to set up two simple parsers: + +```yaml +parsers: + - name: json + format: json + + - name: docker + format: json + time_key: time + time_format: "%Y-%m-%dT%H:%M:%S.%L" + time_keep: true +``` + +You can define multiple parsers sections, either within the main configuration file or distributed across included files. + +For more detailed information on parser options and advanced configurations, please refer to the [Configuring Parsers](../../../pipeline/parsers/configuring-parser.md) section. diff --git a/administration/configuring-fluent-bit/yaml/pipeline-section.md b/administration/configuring-fluent-bit/yaml/pipeline-section.md new file mode 100644 index 000000000..421a941bb --- /dev/null +++ b/administration/configuring-fluent-bit/yaml/pipeline-section.md @@ -0,0 +1,149 @@ +# Pipeline Section + +The `pipeline` section defines the flow of how data is collected, processed, and sent to its final destination. It encompasses the following core concepts: + +| Name | Description | +|---|---| +| `inputs` | Specifies the name of the plugin responsible for collecting or receiving data. This component serves as the data source in the pipeline. Examples of input plugins include `tail`, `http`, and `random`. | +| `processors` | **Unique to YAML configuration**, processors are specialized plugins that handle data processing directly attached to input plugins. Unlike filters, processors are not dependent on tag or matching rules. Instead, they work closely with the input to modify or enrich the data before it reaches the filtering or output stages. Processors are defined within an input plugin section. | +| `filters` | Filters are used to transform, enrich, or discard events based on specific criteria. They allow matching tags using strings or regular expressions, providing a more flexible way to manipulate data. Filters run as part of the main event loop and can be applied across multiple inputs and filters. Examples of filters include `modify`, `grep`, and `nest`. | +| `outputs` | Defines the destination for processed data. Outputs specify where the data will be sent, such as to a remote server, a file, or another service. Each output plugin is configured with matching rules to determine which events are sent to that destination. Common output plugins include `stdout`, `elasticsearch`, and `kafka`. | + +## Example Configuration + +Here’s a simple example of a pipeline configuration: + +```yaml +pipeline: + inputs: + - name: tail + path: /var/log/example.log + parser: json + + processors: + logs: + - name: record_modifier + filters: + - name: grep + match: '*' + regex: key pattern + + outputs: + - name: stdout + match: '*' +``` + +## Pipeline Processors + +Processors operate on specific signals such as logs, metrics, and traces. They are attached to an input plugin and must specify the signal type they will process. + +### Example of a Processor + +In the example below, the content_modifier processor inserts or updates (upserts) the key my_new_key with the value 123 for all log records generated by the tail plugin. This processor is only applied to log signals: + +```yaml +parsers: + - name: json + format: json + +pipeline: + inputs: + - name: tail + path: /var/log/example.log + parser: json + + processors: + logs: + - name: content_modifier + action: upsert + key: my_new_key + value: 123 + filters: + - name: grep + match: '*' + regex: key pattern + + outputs: + - name: stdout + match: '*' +``` + +Here is a more complete example with multiple processors: + +```yaml +service: + log_level: info + http_server: on + http_listen: 0.0.0.0 + http_port: 2021 + +pipeline: + inputs: + - name: random + tag: test-tag + interval_sec: 1 + processors: + logs: + - name: modify + add: hostname monox + - name: lua + call: append_tag + code: | + function append_tag(tag, timestamp, record) + new_record = record + new_record["tag"] = tag + return 1, timestamp, new_record + end + + outputs: + - name: stdout + match: '*' + processors: + logs: + - name: lua + call: add_field + code: | + function add_field(tag, timestamp, record) + new_record = record + new_record["output"] = "new data" + return 1, timestamp, new_record + end +``` + +You might noticed that processors not only can be attached to input, but also to an output. + +### How Are Processors Different from Filters? + +While processors and filters are similar in that they can transform, enrich, or drop data from the pipeline, there is a significant difference in how they operate: + +- Processors: Run in the same thread as the input plugin when the input plugin is configured to be threaded (threaded: true). This design provides better performance, especially in multi-threaded setups. + +- Filters: Run in the main event loop. When multiple filters are used, they can introduce performance overhead, particularly under heavy workloads. + +## Running Filters as Processors + +You can configure existing [Filters](https://docs.fluentbit.io/manual/pipeline/filters) to run as processors. There are no specific changes needed; you simply use the filter name as if it were a native processor. + +### Example of a Filter Running as a Processor + +In the example below, the grep filter is used as a processor to filter log events based on a pattern: + +```yaml +parsers: + - name: json + format: json + +pipeline: + inputs: + - name: tail + path: /var/log/example.log + parser: json + + processors: + logs: + - name: grep + regex: log aa + outputs: + - name: stdout + match: '*' +``` diff --git a/administration/configuring-fluent-bit/yaml/plugins-section.md b/administration/configuring-fluent-bit/yaml/plugins-section.md new file mode 100644 index 000000000..c3df7be12 --- /dev/null +++ b/administration/configuring-fluent-bit/yaml/plugins-section.md @@ -0,0 +1,54 @@ +# Plugins Section + +While Fluent Bit comes with a variety of built-in plugins, it also supports loading external plugins at runtime. This feature is especially useful for loading Go or Wasm plugins that are built as shared object files (.so). Fluent Bit's YAML configuration provides two ways to load these external plugins: + +## 1. Inline YAML Section + +You can specify external plugins directly within your main YAML configuration file using the `plugins` section. Here’s an example: + +```yaml +plugins: + - /path/to/out_gstdout.so + +service: + log_level: info + +pipeline: + inputs: + - name: random + + outputs: + - name: gstdout + match: '*' +``` + +## 2. YAML Plugins File Included via plugins_file Option + +Alternatively, you can load external plugins from a separate YAML file by specifying the plugins_file option in the service section. Here’s how to configure this: + +```yaml +service: + log_level: info + plugins_file: extra_plugins.yaml + +pipeline: + inputs: + - name: random + + outputs: + - name: gstdout + match: '*' +``` + +In this setup, the `extra_plugins.yaml` file might contain the following plugins section: + +```yaml +plugins: + - /other/path/to/out_gstdout.so +``` + +### Key Points + +- Built-in vs. External: Fluent Bit comes with many built-in plugins, but you can load external plugins at runtime to extend the tool’s functionality. +- Loading Mechanism: External plugins must be shared object files (.so). You can define them inline in the main YAML configuration or include them from a separate YAML file for better modularity. + diff --git a/administration/configuring-fluent-bit/yaml/service-section.md b/administration/configuring-fluent-bit/yaml/service-section.md new file mode 100644 index 000000000..2dce19329 --- /dev/null +++ b/administration/configuring-fluent-bit/yaml/service-section.md @@ -0,0 +1,46 @@ +## Service Section + +The `service` section defines global properties of the service. The available configuration keys are: + +| Key | Description | Default | +|---|---|---| +| `flush` | Sets the flush time in `seconds.nanoseconds`. The engine loop uses a flush timeout to determine when to flush records ingested by input plugins to output plugins. | `1` | +| `grace` | Sets the grace time in `seconds` as an integer value. The engine loop uses a grace timeout to define the wait time before exiting. | `5` | +| `daemon` | Boolean. Specifies whether Fluent Bit should run as a daemon (background process). Allowed values are: `yes`, `no`, `on`, and `off`. Do not enable when using a Systemd-based unit, such as the one provided in Fluent Bit packages. | `off` | +| `dns.mode` | Sets the primary transport layer protocol used by the asynchronous DNS resolver. Can be overridden on a per-plugin basis. | `UDP` | +| `log_file` | Absolute path for an optional log file. By default, all logs are redirected to the standard error interface (stderr). | _none_ | +| `log_level` | Sets the logging verbosity level. Allowed values are: `off`, `error`, `warn`, `info`, `debug`, and `trace`. Values are cumulative. If `debug` is set, it will include `error`, `warn`, `info`, and `debug`. Trace mode is only available if Fluent Bit was built with the _`WITH_TRACE`_ option enabled. | `info` | +| `parsers_file` | Path for a `parsers` configuration file. Multiple `parsers_file` entries can be defined within the section. However, with the new YAML configuration schema, defining parsers using this key is now optional. Parsers can be declared directly in the `parsers` section of your YAML configuration, offering a more streamlined and integrated approach. | _none_ | +| `plugins_file` | Path for a `plugins` configuration file. This file specifies the paths to external plugins (.so files) that Fluent Bit can load at runtime. With the new YAML schema, the `plugins_file` key is optional. External plugins can now be referenced directly within the `plugins` section, simplifying the plugin management process. [See an example](https://github.com/fluent/fluent-bit/blob/master/conf/plugins.conf). | _none_ | +| `streams_file` | Path for the Stream Processor configuration file. This file defines the rules and operations for stream processing within Fluent Bit. The `streams_file` key is optional, as Stream Processor configurations can be defined directly in the `streams` section of the YAML schema. This flexibility allows for easier and more centralized configuration. [Learn more about Stream Processing configuration](../../../stream-processing/introduction.md). | _none_ | +| `http_server` | Enables the built-in HTTP Server. | `off` | +| `http_listen` | Sets the listening interface for the HTTP Server when it's enabled. | `0.0.0.0` | +| `http_port` | Sets the TCP port for the HTTP Server. | `2020` | +| `hot_reload` | Enables hot [reloading](../../hot_reload.md) of configuration with SIGHUP. | `on` | +| `coro_stack_size` | Sets the coroutine stack size in bytes. The value must be greater than the page size of the running system. Setting the value too small (`4096`) can cause coroutine threads to overrun the stack buffer. The default value of this parameter should not be changed. | `24576` | +| `scheduler.cap` | Sets a maximum retry time in seconds. Supported in v1.8.7 and greater. | `2000` | +| `scheduler.base` | Sets the base of exponential backoff. Supported in v1.8.7 and greater. | `5` | +| `json.convert_nan_to_null` | If enabled, `NaN` is converted to `null` when Fluent Bit converts `msgpack` to `json`. | `false` | +| `sp.convert_from_str_to_num` | If enabled, the Stream Processor converts strings that represent numbers to a numeric type. | `true` | + +### Configuration Example + +Below is a simple configuration example that defines a `service` section with [hot reloading](../../hot_reload.md) enabled and a pipeline with a `random` input and `stdout` output: + +```yaml +service: + flush: 1 + log_level: info + http_server: true + http_listen: 0.0.0.0 + http_port: 2020 + hot_reload: on + +pipeline: + inputs: + - name: random + + outputs: + - name: stdout + match: '*' +``` diff --git a/administration/configuring-fluent-bit/yaml/upstream-servers-section.md b/administration/configuring-fluent-bit/yaml/upstream-servers-section.md new file mode 100644 index 000000000..e9f13e00c --- /dev/null +++ b/administration/configuring-fluent-bit/yaml/upstream-servers-section.md @@ -0,0 +1,46 @@ +# Upstream Servers Section + +The `Upstream Servers` section defines a group of endpoints, referred to as nodes, which are used by output plugins to distribute data in a round-robin fashion. This is particularly useful for plugins that require load balancing when sending data. Examples of plugins that support this capability include [Forward](https://docs.fluentbit.io/manual/pipeline/outputs/forward) and [Elasticsearch](https://docs.fluentbit.io/manual/pipeline/outputs/elasticsearch). + +In YAML, this section is named `upstream_servers` and requires specifying a `name` for the group and a list of `nodes`. Below is an example that defines two upstream server groups: `forward-balancing` and `forward-balancing-2`: + +```yaml +upstream_servers: + - name: forward-balancing + nodes: + - name: node-1 + host: 127.0.0.1 + port: 43000 + + - name: node-2 + host: 127.0.0.1 + port: 44000 + + - name: node-3 + host: 127.0.0.1 + port: 45000 + tls: true + tls_verify: false + shared_key: secret + + - name: forward-balancing-2 + nodes: + - name: node-A + host: 192.168.1.10 + port: 50000 + + - name: node-B + host: 192.168.1.11 + port: 51000 +``` + +### Key Concepts + +- Nodes: Each node in the upstream_servers group must specify a name, host, and port. Additional settings like tls, tls_verify, and shared_key can be configured as needed for secure communication. + + +### Usage Note + +While the `upstream_servers` section can be defined globally, some output plugins may require the configuration to be specified in a separate YAML file. Be sure to consult the documentation for each specific output plugin to understand its requirements. + +For more details, refer to the documentation of the respective output plugins. diff --git a/administration/hot-reload.md b/administration/hot-reload.md index 5a60b21c6..730928185 100644 --- a/administration/hot-reload.md +++ b/administration/hot-reload.md @@ -2,15 +2,21 @@ description: Enable hot reload through SIGHUP signal or an HTTP endpoint --- -# Hot Reload +# Hot reload -Fluent Bit supports the hot reloading feature when enabled via the configuration file or command line with `-Y` or `--enable-hot-reload` option. +Fluent Bit supports the reloading feature when enabled in the configuration file +or on the command line with `-Y` or `--enable-hot-reload` option. -## Getting Started +Hot reloading is supported on Linux, macOS, and Windows operating systems. -To get started with reloading via HTTP, the first step is to enable the HTTP Server from the configuration file: +## Update the configuration -```toml +To get started with reloading over HTTP, enable the HTTP Server +in the configuration file: + +{% tabs %} +{% tab title="fluent-bit.conf" %} +```text [SERVICE] HTTP_Server On HTTP_Listen 0.0.0.0 @@ -18,48 +24,57 @@ To get started with reloading via HTTP, the first step is to enable the HTTP Ser Hot_Reload On ... ``` - -The above configuration snippet will enable the HTTP endpoint for hot reloading. +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +service: + http_server: on + http_listen: 0.0.0.0 + http_port: 2020 + hot_reload: on +``` +{% endtab %} +{% endtabs %} ## How to reload -### Via HTTP +After updating the configuration, use one of the following methods to perform a +hot reload: -Hot reloading can be kicked via HTTP endpoints that are: +### HTTP -* `PUT /api/v2/reload` -* `POST /api/v2/reload` +Use the following HTTP endpoints to perform a hot reload: -If users don't enable the hot reloading feature, hot reloading via these endpoints will not work. +- `PUT /api/v2/reload` +- `POST /api/v2/reload` For using curl to reload Fluent Bit, users must specify an empty request body as: ```text -$ curl -X POST -d '{}' localhost:2020/api/v2/reload +curl -X POST -d '{}' localhost:2020/api/v2/reload ``` -### Via Signal +### Signal -Hot reloading also can be kicked via `SIGHUP`. +Hot reloading can be used with `SIGHUP`. -`SIGHUP` signal is not supported on Windows. So, users can't enable this feature on Windows. +`SIGHUP` signal isn't supported on Windows. -## How to confirm reloaded or not +## Confirm a reload -### via HTTP +Use one of the following methods to confirm the reload occurred. -The number of hot reloaded count can be obtained via the HTTP endpoint that is: +### HTTP -* `GET /api/v2/reload` +Obtain a count of hot reload using the HTTP endpoint: -The endpoint returns the count of hot-reloaded as follows: +- `GET /api/v2/reload` + +The endpoint returns `hot_reload_count` as follows: ```json {"hot_reload_count":3} ``` -The default value of that number is 0. - -## Limitations - -The hot reloading feature is currently working on Linux, macOS and Windows. +The default value of the counter is `0`. diff --git a/administration/http-proxy.md b/administration/http-proxy.md index 840aca9ac..ee28e6d51 100644 --- a/administration/http-proxy.md +++ b/administration/http-proxy.md @@ -1,63 +1,81 @@ --- -description: Enable traffic through a proxy server via HTTP_PROXY environment variable +description: Enable traffic through a proxy server using the HTTP_PROXY environment variable. --- # HTTP Proxy -Fluent Bit supports configuring an HTTP proxy for all egress HTTP/HTTPS traffic via the `HTTP_PROXY` or `http_proxy` environment variable. +Fluent Bit supports configuring an HTTP proxy for all egress HTTP/HTTPS traffic +using the `HTTP_PROXY` or `http_proxy` environment variable. The format for the HTTP proxy environment variable is `http://USER:PASS@HOST:PORT`, where: -* `USER` is the username when using basic authentication. -* `PASS` is the password when using basic authentication. -* `HOST` is the HTTP proxy hostname or IP address. -* `PORT` is the port the HTTP proxy is listening on. +- _`USER`_ is the username when using basic authentication. +- _`PASS`_ is the password when using basic authentication. +- _`HOST`_ is the HTTP proxy hostname or IP address. +- _`PORT`_ is the port the HTTP proxy is listening on. To use an HTTP proxy with basic authentication, provide the username and password: -```bash +```text HTTP_PROXY='http://example_user:example_pass@proxy.example.com:8080' ``` When no authentication is required, omit the username and password: -```bash +```text HTTP_PROXY='http://proxy.example.com:8080' ``` -The `HTTP_PROXY` environment variable is a [standard way](https://docs.docker.com/network/proxy/#use-environment-variables) for setting a HTTP proxy in a containerized environment, and it is also natively supported by any application written in Go. Therefore, we follow and implement the same convention for Fluent Bit. For convenience and compatibility, the `http_proxy` environment variable is also supported. When both the `HTTP_PROXY` and `http_proxy` environment variables are provided, `HTTP_PROXY` will be preferred. +The `HTTP_PROXY` environment variable is a [standard +way](https://docs.docker.com/network/proxy/#use-environment-variables) of setting a +HTTP proxy in a containerized environment, and it's also natively supported by any +application written in Go. Fluent Bit implements the same convention. The +`http_proxy` environment variable is also supported. When both the `HTTP_PROXY` and +`http_proxy` environment variables are provided, `HTTP_PROXY` will be preferred. {% hint style="info" %} -**Note**: The [HTTP output plugin](https://docs.fluentbit.io/manual/pipeline/outputs/http) also supports configuring an HTTP proxy. This configuration continues to work, however it _should not_ be used together with the `HTTP_PROXY` or `http_proxy` environment variable. This is because under the hood, the environment variable based proxy configuration is implemented by setting up a TCP connection tunnel via [HTTP CONNECT](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/CONNECT). Unlike the plugin's implementation, this supports both HTTP and HTTPS egress traffic. + +The [HTTP output plugin](https://docs.fluentbit.io/manual/pipeline/outputs/http) also +supports configuring an HTTP proxy. This configuration works, but shouldn't be used +with the `HTTP_PROXY` or `http_proxy` environment variable. The environment +variable-based proxy configuration is implemented by creating a TCP connection tunnel +using +[HTTP CONNECT](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/CONNECT). Unlike +the plugin's implementation, this supports both HTTP and HTTPS egress traffic. + {% endhint %} -# NO_PROXY +## `NO_PROXY` -Not all traffic should flow through the HTTP proxy. In this case, the `NO_PROXY` or `no_proxy` environment variable should be used. +Use the `NO_PROXY` environment variable when traffic shouldn't flow through the HTTP +proxy. The `no_proxy` environment variable is also supported. When both `NO_PROXY` +and `no_proxy` environment variables are provided, `NO_PROXY` takes precedence. -The format for the no proxy environment variable is a comma-separated list of hostnames or IP addresses whose traffic should not flow through the HTTP proxy. +The format for the `no_proxy` environment variable is a comma-separated list of +host names or IP addresses. -A domain name matches itself and all its subdomains (i.e. `foo.com` matches `foo.com` and `bar.foo.com`): +A domain name matches itself and all of its subdomains (for example, `example.com` +matches both `example.com` and `test.example.com`): -```bash +```text NO_PROXY='foo.com,127.0.0.1,localhost' ``` -A domain with a leading `.` only matches its subdomains (i.e. `.foo.com` matches `bar.foo.com` but not `foo.com`): +A domain with a leading dot (`.`) matches only its subdomains (for example, +`.example.com` matches `test.example.com` but not `example.com`): -```bash -NO_PROXY='.foo.com,127.0.0.1,localhost' +```text +NO_PROXY='.example.com,127.0.0.1,localhost' ``` -One typical use case for `NO_PROXY` is when running Fluent Bit in a Kubernetes environment, where we want: +As an example, you might use `NO_PROXY` when running Fluent Bit in a Kubernetes +environment, where and you want: -* All real egress traffic to flow through an HTTP proxy. -* All local Kubernetes traffic to not flow through the HTTP proxy. +- All real egress traffic to flow through an HTTP proxy. +- All local Kubernetes traffic to not flow through the HTTP proxy. -In this case, we can set: +In this case, set: -```bash +```text NO_PROXY='127.0.0.1,localhost,kubernetes.default.svc' ``` - -For convenience and compatibility, the `no_proxy` environment variable is also supported. When both the `NO_PROXY` and `no_proxy` environment variables are provided, `NO_PROXY` will be preferred. diff --git a/administration/memory-management.md b/administration/memory-management.md index dd8d33f4c..5289bb6e1 100644 --- a/administration/memory-management.md +++ b/administration/memory-management.md @@ -1,28 +1,46 @@ -# Memory Management +# Memory management -In certain scenarios it would be ideal to estimate how much memory Fluent Bit could be using, this is very useful for containerized environments where memory limits are a must. +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=5cc3ce54-e910-4ebf-85f5-f02530b3e11b" /> -In order to that we will assume that the input plugins have set the **Mem\_Buf\_Limit** option \(you can learn more about it in the [Backpressure](backpressure.md) section\). +You might need to estimate how much memory Fluent Bit could be using in scenarios +like containerized environments where memory limits are essential. + +To make an estimate, in-use input plugins must set the `Mem_Buf_Limit`option. +Learn more about it in [Backpressure](backpressure.md). ## Estimating -Input plugins append data independently, so in order to do an estimation, a limit should be imposed through the **Mem\_Buf\_Limit** option. If the limit was set to _10MB_ we need to estimate that in the worse case, the output plugin likely could use _20MB_. +Input plugins append data independently. To make an estimation, impose a limit with +the `Mem_Buf_Limit` option. If the limit was set to `10MB`, you can estimate that in +the worst case, the output plugin likely could use `20MB`. -Fluent Bit has an internal binary representation for the data being processed, but when this data reaches an output plugin, it will likely create its own representation in a new memory buffer for processing. -The best examples are the [InfluxDB](../pipeline/outputs/influxdb.md) and [Elasticsearch](../pipeline/outputs/elasticsearch.md) output plugins, both need to convert the binary representation to their respective custom JSON formats before it can be sent to the backend servers. +Fluent Bit has an internal binary representation for the data being processed. When +this data reaches an output plugin, it can create its own representation in a new +memory buffer for processing. The best examples are the +[InfluxDB](../pipeline/outputs/influxdb.md) and +[Elasticsearch](../pipeline/outputs/elasticsearch.md) output plugins, which need to +convert the binary representation to their respective custom JSON formats before +sending data to the backend servers. -So, if we impose a limit of _10MB_ for the input plugins and consider the worse case scenario of the output plugin consuming _20MB_ extra, as a minimum we need \(_30MB_ x 1.2\) = **36MB**. +When imposing a limit of `10MB` for the input plugins, and a worst case scenario of +the output plugin consuming `20MB`, you need to allocate a minimum (`30MB` x 1.2) = +`36MB`. -## Glibc and Memory Fragmentation +## Glibc and memory fragmentation -It is well known that in intensive environments where memory allocations happen in the orders of magnitude, the default memory allocator provided by Glibc could lead to high fragmentation, reporting a high memory usage by the service. +In intensive environments where memory allocations happen in the orders of magnitude, +the default memory allocator provided by Glibc could lead to high fragmentation, +reporting a high memory usage by the service. -It's strongly suggested that in any production environment, Fluent Bit should be built with [jemalloc](http://jemalloc.net/) enabled \(e.g. `-DFLB_JEMALLOC=On`\). Jemalloc is an alternative memory allocator that can reduce fragmentation \(among others things\) resulting in better performance. +It's strongly suggested that in any production environment, Fluent Bit should be +built with [jemalloc](http://jemalloc.net/) enabled (`-DFLB_JEMALLOC=On`). +The jemalloc implementation of malloc is an alternative memory allocator that can +reduce fragmentation, resulting in better performance. -You can check if Fluent Bit has been built with Jemalloc using the following command: +Use the following command to determine if Fluent Bit has been built with jemalloc: -```text -$ bin/fluent-bit -h | grep JEMALLOC +```bash +bin/fluent-bit -h | grep JEMALLOC ``` The output should look like: @@ -34,5 +52,4 @@ FLB_HAVE_PROXY_GO FLB_HAVE_JEMALLOC JEMALLOC_MANGLE FLB_HAVE_REGEX FLB_HAVE_C_TLS FLB_HAVE_SETJMP FLB_HAVE_ACCEPT4 FLB_HAVE_INOTIFY ``` -If the FLB\_HAVE\_JEMALLOC option is listed in _Build Flags_, everything will be fine. - +If the `FLB_HAVE_JEMALLOC` option is listed in `Build Flags`, jemalloc is enabled. diff --git a/administration/monitoring.md b/administration/monitoring.md index 59ea80531..1b0c132cc 100644 --- a/administration/monitoring.md +++ b/administration/monitoring.md @@ -1,27 +1,35 @@ --- +title: Monitor data pipelines description: Learn how to monitor your Fluent Bit data pipelines --- -# Monitoring +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=e9ca51eb-7faf-491d-a62e-618a21c94506" /> -Fluent Bit comes with built-it features to allow you to monitor the internals of your pipeline, connect to Prometheus and Grafana, Health checks and also connectors to use external services for such purposes: +# Monitor data pipelines -* [HTTP Server: JSON and Prometheus Exporter-style metrics](monitoring.md#http-server) -* [Grafana Dashboards and Alerts](monitoring.md#grafana-dashboard-and-alerts) -* [Health Checks](monitoring.md#health-check-for-fluent-bit) -* [Calyptia Cloud: hosted service to monitor and visualize your pipelines](monitoring.md#calyptia-cloud) +Fluent Bit includes features for monitoring the internals of your pipeline, in +addition to connecting to Prometheus and Grafana, Health checks, and connectors to +use external services: -## HTTP Server +- [HTTP Server: JSON and Prometheus Exporter-style metrics](monitoring.md#http-server) +- [Grafana Dashboards and Alerts](monitoring.md#grafana-dashboard-and-alerts) +- [Health Checks](monitoring.md#health-check-for-fluent-bit) +- [Telemetry Pipeline: hosted service to monitor and visualize your pipelines](monitoring.md#telemetry-pipeline) -Fluent Bit comes with a built-in HTTP Server that can be used to query internal information and monitor metrics of each running plugin. +## HTTP server -The monitoring interface can be easily integrated with Prometheus since we support it native format. +Fluent Bit includes an HTTP server for querying internal information and monitoring +metrics of each running plugin. -### Getting Started +You can integrate the monitoring interface with Prometheus. -To get started, the first step is to enable the HTTP Server from the configuration file: +### Getting started -``` +To get started, enable the HTTP server from the configuration file. The following +configuration instructs Fluent Bit to start an HTTP server on TCP port `2020` and +listen on all network interfaces: + +```yaml [SERVICE] HTTP_Server On HTTP_Listen 0.0.0.0 @@ -35,10 +43,15 @@ To get started, the first step is to enable the HTTP Server from the configurati Match * ``` -the above configuration snippet will instruct Fluent Bit to start it HTTP Server on TCP Port 2020 and listening on all network interfaces: +Apply the configuration file: +```shell +bin/fluent-bit -c fluent-bit.conf ``` -$ bin/fluent-bit -c fluent-bit.conf + +Fluent Bit starts and generates output in your terminal: + +```shell Fluent Bit v1.4.0 * Copyright (C) 2019-2020 The Fluent Bit Authors * Copyright (C) 2015-2018 Treasure Data @@ -49,10 +62,12 @@ Fluent Bit v1.4.0 [2020/03/10 19:08:24] [ info] [http_server] listen iface=0.0.0.0 tcp_port=2020 ``` -now with a simple **curl** command is enough to gather some information: +Use `curl` to gather information about the HTTP server. The following command sends +the command output to the `jq` program, which outputs human-readable JSON data to the +terminal. -``` -$ curl -s http://127.0.0.1:2020 | jq +```curl +curl -s http://127.0.0.1:2020 | jq { "fluent-bit": { "version": "0.13.0", @@ -78,132 +93,154 @@ $ curl -s http://127.0.0.1:2020 | jq } ``` -Note that we are sending the _curl_ command output to the _jq_ program which helps to make the JSON data easy to read from the terminal. Fluent Bit don't aim to do JSON pretty-printing. - -### REST API Interface - -Fluent Bit aims to expose useful interfaces for monitoring, as of Fluent Bit v0.14 the following end points are available: - -| URI | Description | Data Format | -| -------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------- | -| / | Fluent Bit build information | JSON | -| /api/v1/uptime | Get uptime information in seconds and human readable format | JSON | -| /api/v1/metrics | Internal metrics per loaded plugin | JSON | -| /api/v1/metrics/prometheus | Internal metrics per loaded plugin ready to be consumed by a Prometheus Server | Prometheus Text 0.0.4 | -| /api/v1/storage | Get internal metrics of the storage layer / buffered data. This option is enabled only if in the `SERVICE` section the property `storage.metrics` has been enabled | JSON | -| /api/v1/health | Fluent Bit health check result | String | -| /api/v2/metrics | Internal metrics per loaded plugin | [cmetrics text format](https://github.com/fluent/cmetrics) | -| /api/v2/metrics/prometheus | Internal metrics per loaded plugin ready to be consumed by a Prometheus Server | Prometheus Text 0.0.4 | -| /api/v2/reload | Execute hot reloading or get the status of hot reloading. For more details, please refer to the [hot-reloading documentation](hot-reload.md). | JSON | - -### Metric Descriptions - -#### For v1 metrics - -The following are detailed descriptions for the metrics outputted in prometheus format by `/api/v1/metrics/prometheus`. - -The following definitions are key to understand: -* record: a single message collected from a source, such as a single long line in a file. -* chunk: Fluent Bit input plugin instances ingest log records and store them in chunks. A batch of records in a chunk are tracked together as a single unit; the Fluent Bit engine attempts to fit records into chunks of at most 2 MB, but the size can vary at runtime. Chunks are then sent to an output. An output plugin instance can either successfully send the full chunk to the destination and mark it as successful, or it can fail the chunk entirely if an unrecoverable error is encountered, or it can ask for the chunk to be retried. - -| Metric Name | Labels | Description | Type | Unit | -|----------------------------------------|-------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|---------| -| fluentbit_input_bytes_total | name: the name or alias for the input instance | The number of bytes of log records that this input instance has successfully ingested | counter | bytes | -| fluentbit_input_records_total | name: the name or alias for the input instance | The number of log records this input has successfully ingested | counter | records | -| fluentbit_output_dropped_records_total | name: the name or alias for the output instance | The number of log records that have been dropped by the output. This means they met an unrecoverable error or retries expired for their chunk. | counter | records | -| fluentbit_output_errors_total | name: the name or alias for the output instance | The number of chunks that have faced an error (either unrecoverable or retriable). This is the number of times a chunk has failed, and does not correspond with the number of error messages you see in the Fluent Bit log output. | counter | chunks | -| fluentbit_output_proc_bytes_total | name: the name or alias for the output instance | The number of bytes of log records that this output instance has *successfully* sent. This is the total byte size of all unique chunks sent by this output. If a record is not sent due to some error, then it will not count towards this metric. | counter | bytes | -| fluentbit_output_proc_records_total | name: the name or alias for the output instance | The number of log records that this output instance has *successfully* sent. This is the total record count of all unique chunks sent by this output. If a record is not successfully sent, it does not count towards this metric. | counter | records | -| fluentbit_output_retried_records_total | name: the name or alias for the output instance | The number of log records that experienced a retry. Note that this is calculated at the chunk level, the count increased when an entire chunk is marked for retry. An output plugin may or may not perform multiple actions that generate many error messages when uploading a single chunk. | counter | records | -| fluentbit_output_retries_failed_total | name: the name or alias for the output instance | The number of times that retries expired for a chunk. Each plugin configures a Retry_Limit which applies to chunks. Once the Retry_Limit has been reached for a chunk it is discarded and this metric is incremented. | counter | chunks | -| fluentbit_output_retries_total | name: the name or alias for the output instance | The number of times this output instance requested a retry for a chunk. | counter | chunks | -| fluentbit_uptime | | The number of seconds that Fluent Bit has been running. | counter | seconds | -| process_start_time_seconds | | The Unix Epoch time stamp for when Fluent Bit started.. | guage | seconds | - - -The following are detailed descriptions for the metrics outputted in JSON format by `/api/v1/storage`. - - -| Metric Key | Description | Unit | -|---------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------| -| chunks.total_chunks | The total number of chunks of records that Fluent Bit is currently buffering | chunks | -| chunks.mem_chunks | The total number of chunks that are buffered in memory at this time. Note that chunks can be both in memory and on the file system at the same time. | chunks | -| chunks.fs_chunks | The total number of chunks saved to the filesystem. | chunks | -| chunks.fs_chunks_up | A chunk is "up" if it is in memory. So this is the count of chunks that are both in filesystem and in memory. | chunks | -| chunks.fs_chunks_down | The count of chunks that are "down" and thus are only in the filesystem. | chunks | -| | | | -| input_chunks.{plugin name}.status.overlimit | Is this input instance over its configured Mem_Buf_Limit? | boolean | -| input_chunks.{plugin name}.status.mem_size | The size of memory that this input is consuming to buffer logs in chunks. | bytes | -| input_chunks.{plugin name}.status.mem_limit | The buffer memory limit (Mem_Buf_Limit) that applies to this input plugin. | bytes | -| | | | -| input_chunks.{plugin name}.chunks.total | The current total number of chunks owned by this input instance. | chunks | -| input_chunks.{plugin name}.chunks.up | The current number of chunks that are "up" in memory for this input. Chunks that are "up" will also be in the filesystem layer as well if filesystem storage is enabled. | chunks | -| input_chunks.{plugin name}.chunks.down | The current number of chunks that are "down" in the filesystem for this input. | chunks | -| input_chunks.{plugin name}.chunks.busy | "Busy" chunks are chunks that are being processed/sent by outputs and are not eligible to have new data appended. | chunks | -| input_chunks.{plugin name}.chunks.busy_size | The sum of the byte size of each chunk which is currently marked as busy. | bytes | - -#### For v2 metrics - -The following are detailed descriptions for the metrics outputted in prometheus format by `/api/v2/metrics/prometheus` or `/api/v2/metrics`. - -The following definitions are key to understand: -* record: a single message collected from a source, such as a single long line in a file. -* chunk: Fluent Bit input plugin instances ingest log records and store them in chunks. A batch of records in a chunk are tracked together as a single unit; the Fluent Bit engine attempts to fit records into chunks of at most 2 MB, but the size can vary at runtime. Chunks are then sent to an output. An output plugin instance can either successfully send the full chunk to the destination and mark it as successful, or it can fail the chunk entirely if an unrecoverable error is encountered, or it can ask for the chunk to be retried. - -| Metric Name | Labels | Description | Type | Unit | -|--------------------------------------------|-------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|---------| -| fluentbit\_input\_bytes\_total | name: the name or alias for the input instance | The number of bytes of log records that this input instance has successfully ingested | counter | bytes | -| fluentbit\_input\_records\_total | name: the name or alias for the input instance | The number of log records this input has successfully ingested | counter | records | -| fluentbit\_filter\_bytes\_total | name: the name or alias for the filter instance | The number of bytes of log records that this filter instance has successfully ingested | counter | bytes | -| fluentbit\_filter\_records\_total | name: the name or alias for the filter instance | The number of log records this filter has successfully ingested | counter | records | -| fluentbit\_filter\_added\_records\_total | name: the name or alias for the filter instance | The number of log records that have been added by the filter. This means they added into the data pipeline. | counter | records | -| fluentbit\_filter\_dropped\_records\_total | name: the name or alias for the filter instance | The number of log records that have been dropped by the filter. This means they removed from the data pipeline. | counter | records | -| fluentbit\_output\_dropped\_records\_total | name: the name or alias for the output instance | The number of log records that have been dropped by the output. This means they met an unrecoverable error or retries expired for their chunk. | counter | records | -| fluentbit\_output\_errors\_total | name: the name or alias for the output instance | The number of chunks that have faced an error (either unrecoverable or retriable). This is the number of times a chunk has failed, and does not correspond with the number of error messages you see in the Fluent Bit log output. | counter | chunks | -| fluentbit\_output\_proc\_bytes\_total | name: the name or alias for the output instance | The number of bytes of log records that this output instance has *successfully* sent. This is the total byte size of all unique chunks sent by this output. If a record is not sent due to some error, then it will not count towards this metric. | counter | bytes | -| fluentbit\_output\_proc\_records\_total | name: the name or alias for the output instance | The number of log records that this output instance has *successfully* sent. This is the total record count of all unique chunks sent by this output. If a record is not successfully sent, it does not count towards this metric. | counter | records | -| fluentbit\_output\_retried\_records\_total | name: the name or alias for the output instance | The number of log records that experienced a retry. Note that this is calculated at the chunk level, the count increased when an entire chunk is marked for retry. An output plugin may or may not perform multiple actions that generate many error messages when uploading a single chunk. | counter | records | -| fluentbit\_output\_retries\_failed\_total | name: the name or alias for the output instance | The number of times that retries expired for a chunk. Each plugin configures a Retry\_Limit which applies to chunks. Once the Retry\_Limit has been reached for a chunk it is discarded and this metric is incremented. | counter | chunks | -| fluentbit\_output\_retries\_total | name: the name or alias for the output instance | The number of times this output instance requested a retry for a chunk. | counter | chunks | -| fluentbit\_uptime | hostname: the hostname on running fluent-bit | The number of seconds that Fluent Bit has been running. | counter | seconds | -| fluentbit\_process\_start\_time\_seconds | hostname: the hostname on running fluent-bit | The Unix Epoch time stamp for when Fluent Bit started. | gauge | seconds | -| fluentbit\_build\_info | hostname: the hostname, version: the version of fluent-bit, os: OS type | Build version information. The returned value is originated from initializing the Unix Epoch time stamp of config context. | gauge | seconds | -| fluentbit\_hot\_reloaded\_times | hostname: the hostname on running fluent-bit | Collect the count of hot reloaded times. | gauge | seconds | - -The following are detailed descriptions for the metrics which is collected by storage layer. - - -| Metric Name | Labels | Description | Type | Unit | -|-------------------------------------------------|-------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------|---------| -| fluentbit\_input\_chunks.storage\_chunks | None | The total number of chunks of records that Fluent Bit is currently buffering | gauge | chunks | -| fluentbit\_storage\_mem\_chunk | None | The total number of chunks that are buffered in memory at this time. Note that chunks can be both in memory and on the file system at the same time. | gauge | chunks | -| fluentbit\_storage\_fs\_chunks | None | The total number of chunks saved to the filesystem. | gauge | chunks | -| fluentbit\_storage\_fs\_chunks\_up | None | A chunk is "up" if it is in memory. So this is the count of chunks that are both in filesystem and in memory. | gauge | chunks | -| fluentbit\_storage\_fs\_chunks\_down | None | The count of chunks that are "down" and thus are only in the filesystem. | gauge | chunks | -| fluentbit\_storage\_fs\_chunks\_busy | None | The total number of chunks are in a busy state. | gauge | chunks | -| fluentbit\_storage\_fs\_chunks\_busy\_bytes | None | The total bytes of chunks are in a busy state. | gauge | bytes | -| | | | | | -| fluentbit\_input\_storage\_overlimit | name: the name or alias for the input instance | Is this input instance over its configured Mem\_Buf\_Limit? | gauge | boolean | -| fluentbit\_input\_storage\_memory\_bytes | name: the name or alias for the input instance | The size of memory that this input is consuming to buffer logs in chunks. | gauge | bytes | -| | | | | | -| fluentbit\_input\_storage\_chunks | name: the name or alias for the input instance | The current total number of chunks owned by this input instance. | gauge | chunks | -| fluentbit\_input\_storage\_chunks\_up | name: the name or alias for the input instance | The current number of chunks that are "up" in memory for this input. Chunks that are "up" will also be in the filesystem layer as well if filesystem storage is enabled. | gauge | chunks | -| fluentbit\_input\_storage\_chunks\_down | name: the name or alias for the input instance | The current number of chunks that are "down" in the filesystem for this input. | gauge | chunks | -| fluentbit\_input\_storage\_chunks\_busy | name: the name or alias for the input instance | "Busy" chunks are chunks that are being processed/sent by outputs and are not eligible to have new data appended. | gauge | chunks | -| fluentbit\_input\_storage\_chunks\_busy\_bytes | name: the name or alias for the input instance | The sum of the byte size of each chunk which is currently marked as busy. | gauge | bytes | -| | | | | | -| fluentbit\_output\_upstream\_total\_connections | name: the name or alias for the output instance | The sum of the connection count of each output plugins. | gauge | bytes | -| fluentbit\_output\_upstream\_busy\_connections | name: the name or alias for the output instance | The sum of the connection count in a busy state of each output plugins. | gauge | bytes | - -### Uptime Example +### REST API interface + +Fluent Bit exposes the following endpoints for monitoring. + +| URI | Description | Data format | +| -------------------------- | ------------- | --------------------- | +| / | Fluent Bit build information. | JSON | +| /api/v1/uptime | Return uptime information in seconds. | JSON | +| /api/v1/metrics | Display internal metrics per loaded plugin. | JSON | +| /api/v1/metrics/prometheus | Display internal metrics per loaded plugin in Prometheus Server format. | Prometheus Text 0.0.4 | +| /api/v1/storage | Get internal metrics of the storage layer / buffered data. This option is enabled only if in the `SERVICE` section of the property `storage.metrics` is enabled. | JSON | +| /api/v1/health | Display the Fluent Bit health check result. | String | +| /api/v2/metrics | Display internal metrics per loaded plugin. | [cmetrics text format](https://github.com/fluent/cmetrics) | +| /api/v2/metrics/prometheus | Display internal metrics per loaded plugin ready in Prometheus Server format. | Prometheus Text 0.0.4 | +| /api/v2/reload | Execute hot reloading or get the status of hot reloading. See the [hot-reloading documentation](hot-reload.md). | JSON | + +### v1 metrics + +The following descriptions apply to v1 metric endpoints. + +#### `/api/v1/metrics/prometheus` endpoint + +The following descriptions apply to metrics outputted in Prometheus format by the +`/api/v1/metrics/prometheus` endpoint. + +The following terms are key to understanding how Fluent Bit processes metrics: + +- **Record**: a single message collected from a source, such as a single long line in + a file. +- **Chunk**: log records ingested and stored by Fluent Bit input plugin instances. A + batch of records in a chunk are tracked together as a single unit. + + The Fluent Bit engine attempts to fit records into chunks of at most `2 MB`, but + the size can vary at runtime. Chunks are then sent to an output. An output plugin + instance can either successfully send the full chunk to the destination and mark it + as successful, or it can fail the chunk entirely if an unrecoverable error is + encountered, or it can ask for the chunk to be retried. + +| Metric name | Labels | Description | Type | Unit | +|----------------------------------------|-------------------------------------------------|-------------|---------|---------| +| `fluentbit_input_bytes_total` | name: the name or alias for the input instance | The number of bytes of log records that this input instance has ingested successfully. | counter | bytes | +| `fluentbit_input_records_total` | name: the name or alias for the input instance | The number of log records this input ingested successfully. | counter | records | +| `fluentbit_output_dropped_records_total` | name: the name or alias for the output instance | The number of log records dropped by the output. These records hit an unrecoverable error or retries expired for their chunk. | counter | records | +| `fluentbit_output_errors_total` | name: the name or alias for the output instance | The number of chunks with an error that's either unrecoverable or unable to retry. This metric represents the number of times a chunk failed, and doesn't correspond with the number of error messages visible in the Fluent Bit log output. | counter | chunks | +| `fluentbit_output_proc_bytes_total` | name: the name or alias for the output instance | The number of bytes of log records that this output instance sent successfully. This metric represents the total byte size of all unique chunks sent by this output. If a record is not sent due to some error, it doesn't count towards this metric. | counter | bytes | +| `fluentbit_output_proc_records_total` | name: the name or alias for the output instance | The number of log records that this output instance sent successfully. This metric represents the total record count of all unique chunks sent by this output. If a record is not sent successfully, it doesn't count towards this metric. | counter | records | +| `fluentbit_output_retried_records_total` | name: the name or alias for the output instance | The number of log records that experienced a retry. This metric is calculated at the chunk level, the count increased when an entire chunk is marked for retry. An output plugin might perform multiple actions that generate many error messages when uploading a single chunk. | counter | records | +| `fluentbit_output_retries_failed_total` | name: the name or alias for the output instance | The number of times that retries expired for a chunk. Each plugin configures a `Retry_Limit`, which applies to chunks. When the `Retry_Limit` is exceeded, the chunk is discarded and this metric is incremented. | counter | chunks | +| `fluentbit_output_retries_total` | name: the name or alias for the output instance | The number of times this output instance requested a retry for a chunk. | counter | chunks | +| `fluentbit_uptime` | | The number of seconds that Fluent Bit has been running. | counter | seconds | +| `process_start_time_seconds` | | The Unix Epoch timestamp for when Fluent Bit started. | gauge | seconds | + +#### `/api/v1/storage` endpoint + +The following descriptions apply to metrics outputted in JSON format by the +`/api/v1/storage` endpoint. + +| Metric Key | Description | Unit | +|-----------------------------------------------|---------------|---------| +| `chunks.total_chunks` | The total number of chunks of records that Fluent Bit is currently buffering. | chunks | +| `chunks.mem_chunks` | The total number of chunks that are currently buffered in memory. Chunks can be both in memory and on the file system at the same time. | chunks | +| `chunks.fs_chunks` | The total number of chunks saved to the filesystem. | chunks | +| `chunks.fs_chunks_up` | The count of chunks that are both in file system and in memory. | chunks | +| `chunks.fs_chunks_down` | The count of chunks that are only in the file system. | chunks | +| `input_chunks.{plugin name}.status.overlimit` | Indicates whether the input instance exceeded its configured `Mem_Buf_Limit.` | boolean | +| `input_chunks.{plugin name}.status.mem_size` | The size of memory that this input is consuming to buffer logs in chunks. | bytes | +| `input_chunks.{plugin name}.status.mem_limit` | The buffer memory limit (`Mem_Buf_Limit`) that applies to this input plugin. | bytes | +| `input_chunks.{plugin name}.chunks.total` | The current total number of chunks owned by this input instance. | chunks | +| `input_chunks.{plugin name}.chunks.up` | The current number of chunks that are in memory for this input. If file system storage is enabled, chunks that are "up" are also stored in the filesystem layer. | chunks | +| `input_chunks.{plugin name}.chunks.down` | The current number of chunks that are "down" in the filesystem for this input. | chunks | +| `input_chunks.{plugin name}.chunks.busy` | Chunks are that are being processed or sent by outputs and are not eligible to have new data appended. | chunks | +| `input_chunks.{plugin name}.chunks.busy_size` | The sum of the byte size of each chunk which is currently marked as busy. | bytes | + +### v2 metrics + +The following descriptions apply to v2 metric endpoints. + +#### `/api/v2/metrics/prometheus` or `/api/v2/metrics` endpoint + +The following descriptions apply to metrics outputted in Prometheus format by the +`/api/v2/metrics/prometheus` or `/api/v2/metrics` endpoints. + +The following terms are key to understanding how Fluent Bit processes metrics: + +- **Record**: a single message collected from a source, such as a single long line in + a file. +- **Chunk**: log records ingested and stored by Fluent Bit input plugin instances. A + batch of records in a chunk are tracked together as a single unit. + + The Fluent Bit engine attempts to fit records into chunks of at most `2 MB`, but + the size can vary at runtime. Chunks are then sent to an output. An output plugin + instance can either successfully send the full chunk to the destination and mark it + as successful, or it can fail the chunk entirely if an unrecoverable error is + encountered, or it can ask for the chunk to be retried. + +| Metric Name | Labels | Description | Type | Unit | +|--------------------------------------------|-------------------------------------------------------------------------|-------------|---------|---------| +| `fluentbit_input_bytes_total` | name: the name or alias for the input instance | The number of bytes of log records that this input instance has ingested successfully. | counter | bytes | +| `fluentbit_input_records_total` | name: the name or alias for the input instance | The number of log records this input ingested successfully. | counter | records | +| `fluentbit_filter_bytes_total` | name: the name or alias for the filter instance | The number of bytes of log records that this filter instance has ingested successfully. | counter | bytes | +| `fluentbit_filter_records_total` | name: the name or alias for the filter instance | The number of log records this filter has ingested successfully. | counter | records | +| `fluentbit_filter_added_records_total` | name: the name or alias for the filter instance | The number of log records added by the filter into the data pipeline. | counter | records | +| `fluentbit_filter_drop_records_total` | name: the name or alias for the filter instance | The number of log records dropped by the filter and removed from the data pipeline. | counter | records | +| `fluentbit_output_dropped_records_total` | name: the name or alias for the output instance | The number of log records dropped by the output. These records hit an unrecoverable error or retries expired for their chunk. | counter | records | +| `fluentbit_output_errors_total` | name: the name or alias for the output instance | The number of chunks with an error that's either unrecoverable or unable to retry. This metric represents the number of times a chunk failed, and doesn't correspond with the number of error messages visible in the Fluent Bit log output. | counter | chunks | +| `fluentbit_output_proc_bytes_total` | name: the name or alias for the output instance | The number of bytes of log records that this output instance sent successfully. This metric represents the total byte size of all unique chunks sent by this output. If a record is not sent due to some error, it doesn't count towards this metric. | counter | bytes | +| `fluentbit_output_proc_records_total` | name: the name or alias for the output instance | The number of log records that this output instance sent successfully. This metric represents the total record count of all unique chunks sent by this output. If a record is not sent successfully, it doesn't count towards this metric. | counter | records | +| `fluentbit_output_retried_records_total` | name: the name or alias for the output instance | The number of log records that experienced a retry. This metric is calculated at the chunk level, the count increased when an entire chunk is marked for retry. An output plugin might perform multiple actions that generate many error messages when uploading a single chunk. | counter | records | +| `fluentbit_output_retries_failed_total` | name: the name or alias for the output instance | The number of times that retries expired for a chunk. Each plugin configures a `Retry_Limit`, which applies to chunks. When the `Retry_Limit` is exceeded, the chunk is discarded and this metric is incremented. | counter | chunks | +| `fluentbit_output_retries_total` | name: the name or alias for the output instance | The number of times this output instance requested a retry for a chunk. | counter | chunks | +| `fluentbit_uptime` | hostname: the hostname on running Fluent Bit | The number of seconds that Fluent Bit has been running. | counter | seconds | +| `fluentbit_process_start_time_seconds` | hostname: the hostname on running Fluent Bit | The Unix Epoch time stamp for when Fluent Bit started. | gauge | seconds | +| `fluentbit_build_info` | hostname: the hostname, version: the version of Fluent Bit, os: OS type | Build version information. The returned value is originated from initializing the Unix Epoch time stamp of configuration context. | gauge | seconds | +| `fluentbit_hot_reloaded_times` | hostname: the hostname on running Fluent Bit | Collect the count of hot reloaded times. | gauge | seconds | + +#### Storage layer + +The following are detailed descriptions for the metrics collected by the storage +layer. + +| Metric Name | Labels | Description | Type | Unit | +|---------------------------------------------|------------------------------|---------------|---------|---------| +| `fluentbit_input_chunks.storage_chunks` | None | The total number of chunks of records that Fluent Bit is currently buffering. | gauge | chunks | +| `fluentbit_storage_mem_chunk` | None | The total number of chunks that are currently buffered in memory. Chunks can be both in memory and on the file system at the same time. | gauge | chunks | +| `fluentbit_storage_fs_chunks` | None | The total number of chunks saved to the file system. | gauge | chunks | +| `fluentbit_storage_fs_chunks_up` | None | The count of chunks that are both in file system and in memory. | gauge | chunks | +| `fluentbit_storage_fs_chunks_down` | None | The count of chunks that are only in the file system. | gauge | chunks | +| `fluentbit_storage_fs_chunks_busy` | None | The total number of chunks are in a busy state. | gauge | chunks | +| `fluentbit_storage_fs_chunks_busy_bytes` | None | The total bytes of chunks are in a busy state. | gauge | bytes | +| `fluentbit_input_storage_overlimit` | name: the name or alias for the input instance | Indicates whether the input instance exceeded its configured `Mem_Buf_Limit.` | gauge | boolean | +| `fluentbit_input_storage_memory_bytes` | name: the name or alias for the input instance | The size of memory that this input is consuming to buffer logs in chunks. | gauge | bytes | +| `fluentbit_input_storage_chunks` | name: the name or alias for the input instance | The current total number of chunks owned by this input instance. | gauge | chunks | +| `fluentbit_input_storage_chunks_up` | name: the name or alias for the input instance | The current number of chunks that are in memory for this input. If file system storage is enabled, chunks that are "up" are also stored in the filesystem layer. | gauge | chunks | +| `fluentbit_input_storage_chunks_down` | name: the name or alias for the input instance | The current number of chunks that are "down" in the filesystem for this input. | gauge | chunks | +| `fluentbit_input_storage_chunks_busy` | name: the name or alias for the input instance | Chunks are that are being processed or sent by outputs and are not eligible to have new data appended. | gauge | chunks | +| `fluentbit_input_storage_chunks_busy_bytes` | name: the name or alias for the input instance | The sum of the byte size of each chunk which is currently marked as busy. | gauge | bytes | +| `fluentbit_output_upstream_total_connections` | name: the name or alias for the output instance | The sum of the connection count of each output plugins. | gauge | bytes | +| `fluentbit_output_upstream_busy_connections` | name: the name or alias for the output instance | The sum of the connection count in a busy state of each output plugins. | gauge | bytes | + +### Uptime example Query the service uptime with the following command: -``` +```curl $ curl -s http://127.0.0.1:2020/api/v1/uptime | jq ``` -it should print a similar output like this: +The command prints a similar output like this: ```javascript { @@ -212,7 +249,7 @@ it should print a similar output like this: } ``` -### Metrics Examples +### Metrics example Query internal metrics in JSON format with the following command: @@ -220,7 +257,7 @@ Query internal metrics in JSON format with the following command: $ curl -s http://127.0.0.1:2020/api/v1/metrics | jq ``` -it should print a similar output like this: +The command prints a similar output like this: ```javascript { @@ -242,7 +279,7 @@ it should print a similar output like this: } ``` -### Metrics in Prometheus format +### Query metrics in Prometheus format Query internal metrics in Prometheus Text 0.0.4 format: @@ -250,9 +287,9 @@ Query internal metrics in Prometheus Text 0.0.4 format: $ curl -s http://127.0.0.1:2020/api/v1/metrics/prometheus ``` -this time the same metrics will be in Prometheus format instead of JSON: +This command returns the same metrics in Prometheus format instead of JSON: -``` +```text fluentbit_input_records_total{name="cpu.0"} 57 1509150350542 fluentbit_input_bytes_total{name="cpu.0"} 18069 1509150350542 fluentbit_output_proc_records_total{name="stdout.0"} 54 1509150350542 @@ -262,13 +299,17 @@ fluentbit_output_retries_total{name="stdout.0"} 0 1509150350542 fluentbit_output_retries_failed_total{name="stdout.0"} 0 1509150350542 ``` -### Configuring Aliases +### Configure aliases -By default configured plugins on runtime get an internal name in the format _plugin_name.ID_. For monitoring purposes, this can be confusing if many plugins of the same type were configured. To make a distinction each configured input or output section can get an _alias_ that will be used as the parent name for the metric. +By default, configured plugins on runtime get an internal name in the format +`_plugin_name.ID_`. For monitoring purposes, this can be confusing if many plugins of +the same type were configured. To make a distinction each configured input or output +section can get an _alias_ that will be used as the parent name for the metric. -The following example set an alias to the INPUT section which is using the [CPU](../pipeline/inputs/cpu-metrics.md) input plugin: +The following example sets an alias to the `INPUT` section of the configuration file, +which is using the [CPU](../pipeline/inputs/cpu-metrics.md) input plugin: -``` +```yaml [SERVICE] HTTP_Server On HTTP_Listen 0.0.0.0 @@ -284,7 +325,8 @@ The following example set an alias to the INPUT section which is using the [CPU] Match * ``` -Now when querying the metrics we get the aliases in place instead of the plugin name: +When querying the related metrics, the aliases are returned instead of the plugin +name: ```javascript { @@ -306,13 +348,22 @@ Now when querying the metrics we get the aliases in place instead of the plugin } ``` -## Grafana Dashboard and Alerts +## Grafana dashboard and alerts + +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=0b83cb05-4f52-4853-83cc-f4539b64044d" /> -Fluent Bit's exposed [prometheus style metrics](https://docs.fluentbit.io/manual/administration/monitoring) can be leveraged to create dashboards and alerts. +You can create Grafana dashboards and alerts using Fluent Bit's exposed Prometheus +style metrics. -The provided [example dashboard](https://github.com/fluent/fluent-bit-docs/tree/8172a24d278539a1420036a9434e9f56d987a040/monitoring/dashboard.json) is heavily inspired by [Banzai Cloud](https://banzaicloud.com)'s [logging operator dashboard](https://grafana.com/grafana/dashboards/7752) but with a few key differences such as the use of the `instance` label (see [why here](https://www.robustperception.io/controlling-the-instance-label)), stacked graphs and a focus on Fluent Bit metrics. +The provided [example dashboard](https://github.com/fluent/fluent-bit-docs/tree/8172a24d278539a1420036a9434e9f56d987a040/monitoring/dashboard.json) +is heavily inspired by [Banzai Cloud](https://banzaicloud.com)'s +[logging operator dashboard](https://grafana.com/grafana/dashboards/7752) with a few +key differences, such as the use of the `instance` label, stacked graphs, and a focus +on Fluent Bit metrics. See +[this blog post](https://www.robustperception.io/controlling-the-instance-label) +for more information. - + ### Alerts @@ -322,34 +373,43 @@ Sample alerts are available [here](https://github.com/fluent/fluent-bit-docs/tre Fluent bit now supports four new configs to set up the health check. -| Config Name | Description | Default Value | -| ---------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| ------------- | -| Health_Check | enable Health check feature | Off | -| HC_Errors_Count | the error count to meet the unhealthy requirement, this is a sum for all output plugins in a defined HC_Period, example for output error: ` [2022/02/16 10:44:10] [ warn] [engine] failed to flush chunk '1-1645008245.491540684.flb', retry in 7 seconds: task_id=0, input=forward.1 > output=cloudwatch_logs.3 (out_id=3)` | 5 | -| HC_Retry_Failure_Count | the retry failure count to meet the unhealthy requirement, this is a sum for all output plugins in a defined HC_Period, example for retry failure: `[2022/02/16 20:11:36] [ warn] [engine] chunk '1-1645042288.260516436.flb' cannot be retried: task_id=0, input=tcp.3 > output=cloudwatch_logs.1 ` | 5 | -| HC_Period | The time period by second to count the error and retry failure data point | 60 | +| Configuration name | Description | Default | +| ---------------------- | ------------| ------------- | +| `Health_Check` | enable Health check feature | Off | +| `HC_Errors_Count` | the error count to meet the unhealthy requirement, this is a sum for all output plugins in a defined HC_Period, example for output error: `[2022/02/16 10:44:10] [ warn] [engine] failed to flush chunk '1-1645008245.491540684.flb', retry in 7 seconds: task_id=0, input=forward.1 > output=cloudwatch_logs.3 (out_id=3)` | 5 | +| `HC_Retry_Failure_Count` | the retry failure count to meet the unhealthy requirement, this is a sum for all output plugins in a defined HC_Period, example for retry failure: `[2022/02/16 20:11:36] [ warn] [engine] chunk '1-1645042288.260516436.flb' cannot be retried: task_id=0, input=tcp.3 > output=cloudwatch_logs.1` | 5 | +| `HC_Period` | The time period by second to count the error and retry failure data point | 60 | -*Note: Not every error log means an error nor be counted, the errors retry failures count only on specific errors which is the example in config table description* +Not every error log means an error to be counted. The error retry failures count only +on specific errors, which is the example in configuration table description. -So the feature works as: Based on the HC_Period customer setup, if the real error number is over `HC_Errors_Count` or retry failure is over `HC_Retry_Failure_Count`, fluent bit will be considered as unhealthy. The health endpoint will return HTTP status 500 and String `error`. Otherwise it's healthy, will return HTTP status 200 and string `ok` +Based on the `HC_Period` setting, if the real error number is over `HC_Errors_Count`, +or retry failure is over `HC_Retry_Failure_Count`, Fluent Bit is considered +unhealthy. The health endpoint returns an HTTP status `500` and an `error` message. +Otherwise, the endpoint returns HTTP status `200` and an `ok` message. -The equation is: -``` -health status = (HC_Errors_Count > HC_Errors_Count config value) OR (HC_Retry_Failure_Count > HC_Retry_Failure_Count config value) IN the HC_Period interval +The equation to calculate this behavior is: + +```text +health status = (HC_Errors_Count > HC_Errors_Count config value) OR +(HC_Retry_Failure_Count > HC_Retry_Failure_Count config value) IN +the HC_Period interval ``` -*Note: the HC_Errors_Count and HC_Retry_Failure_Count only count for output plugins and count a sum for errors and retry failures from all output plugins which is running.* -See the config example: +The `HC_Errors_Count` and `HC_Retry_Failure_Count` only count for output plugins and +count a sum for errors and retry failures from all running output plugins. -``` +The following configuration file example shows how to define these settings: + +```yaml [SERVICE] HTTP_Server On HTTP_Listen 0.0.0.0 HTTP_PORT 2020 - Health_Check On - HC_Errors_Count 5 - HC_Retry_Failure_Count 5 - HC_Period 5 + Health_Check On + HC_Errors_Count 5 + HC_Retry_Failure_Count 5 + HC_Period 5 [INPUT] Name cpu @@ -359,53 +419,23 @@ See the config example: Match * ``` -The command to call health endpoint +Use the following command to call the health endpoint: ```bash -$ curl -s http://127.0.0.1:2020/api/v1/health +curl -s http://127.0.0.1:2020/api/v1/health ``` -Based on the fluent bit status, the result will be: - -* HTTP status 200 and "ok" in response to healthy status -* HTTP status 500 and "error" in response for unhealthy status +With the example config, the health status is determined by the following equation: -With the example config, the health status is determined by following equation: -``` +```text Health status = (HC_Errors_Count > 5) OR (HC_Retry_Failure_Count > 5) IN 5 seconds ``` -If (HC_Errors_Count > 5) OR (HC_Retry_Failure_Count > 5) IN 5 seconds is TRUE, then it's unhealthy. - -If (HC_Errors_Count > 5) OR (HC_Retry_Failure_Count > 5) IN 5 seconds is FALSE, then it's healthy. - - -## Calyptia - -[Calyptia](https://calyptia.com/free-trial) is a hosted service that allows you to monitor your Fluent Bit agents including data flow, metrics and configurations. - - - -### Get Started with Calyptia Cloud - -Register your Fluent Bit agent will take **less than one minute**, steps: - -* Go to the calyptia core console and sign-in -* On the left menu click on settings and generate/copy your API key - -In your Fluent Bit configuration file, append the following configuration section: - -``` -[CUSTOM] - name calyptia - api_key <YOUR_API_KEY> -``` - -Make sure to replace your API key in the configuration.\ -\ -After a few seconds upon restart your Fluent Bit agent, the Calyptia Cloud Dashboard will list your agent. Metrics will take around 30 seconds to shows up. - +- If this equation evaluates to `TRUE`, then Fluent Bit is unhealthy. +- If this equation evaluates to `FALSE`, then Fluent Bit is healthy. -### Contact Calyptia +## Telemetry Pipeline -If want to get in touch with Calyptia team, just send an email to [hello@calyptia.com](mailto:hello@calyptia.com) +[Telemetry Pipeline](https://chronosphere.io/platform/telemetry-pipeline/) is a +hosted service that allows you to monitor your Fluent Bit agents including data flow, +metrics, and configurations. diff --git a/administration/multithreading.md b/administration/multithreading.md new file mode 100644 index 000000000..8656317ef --- /dev/null +++ b/administration/multithreading.md @@ -0,0 +1,50 @@ +--- +description: Learn how to run Fluent Bit in multiple threads for improved scalability. +--- + +# Multithreading + +Fluent Bit has one event loop to handle critical operations, like managing +timers, receiving internal messages, scheduling flushes, and handling retries. +This event loop runs in the main Fluent Bit thread. + +To free up resources in the main thread, you can configure +[inputs](../pipeline/inputs/README.md) and [outputs](../pipeline/outputs/README.md) +to run in their own self-contained threads. However, inputs and outputs implement +multithreading in distinct ways: inputs can run in `threaded` mode, and outputs +can use one or more `workers`. + +Threading also affects certain processes related to inputs and outputs. For example, +[filters](../pipeline/filters/README.md) always run in the main thread, but +[processors](../pipeline/processors/README.md) run in the self-contained threads of +their respective inputs or outputs, if applicable. + +## Inputs + +When inputs collect telemetry data, they can either perform this process +inside the main Fluent Bit thread or inside a separate dedicated thread. You can +configure this behavior by enabling or disabling the `threaded` setting. + +All inputs are capable of running in threaded mode, but certain inputs always +run in threaded mode regardless of configuration. These always-threaded inputs are: + +- [Kubernetes Events](../pipeline/inputs/kubernetes-events.md) +- [Node Exporter Metrics](../pipeline/inputs/node-exporter-metrics.md) +- [Process Exporter Metrics](../pipeline/inputs/process-exporter-metrics.md) +- [Windows Exporter Metrics](../pipeline/inputs/windows-exporter-metrics.md) + +Inputs aren't internally aware of multithreading. If an input runs in threaded +mode, Fluent Bit manages the logistics of that input's thread. + +## Outputs + +When outputs flush data, they can either perform this operation inside Fluent Bit's +main thread or inside a separate dedicated thread called a _worker_. Each output +can have one or more workers running in parallel, and each worker can handle multiple +concurrent flushes. You can configure this behavior by changing the value of the +`workers` setting. + +All outputs are capable of running in multiple workers, and each output has +a default value of `0`, `1`, or `2` workers. However, even if an output uses +workers by default, you can safely reduce the number of workers below the default +or disable workers entirely. diff --git a/administration/networking.md b/administration/networking.md index 8179e6d3c..251645730 100644 --- a/administration/networking.md +++ b/administration/networking.md @@ -1,72 +1,99 @@ # Networking -[Fluent Bit](https://fluentbit.io) implements a unified networking interface that is exposed to components like plugins. This interface abstract all the complexity of general I/O and is fully configurable. +[Fluent Bit](https://fluentbit.io) implements a unified networking interface that's +exposed to components like plugins. This interface abstracts the complexity of +general I/O and is fully configurable. -A common use case is when a component or plugin needs to connect to a service to send and receive data. Despite the operational mode sounds easy to deal with, there are many factors that can make things hard like unresponsive services, networking latency or any kind of connectivity error. The networking interface aims to abstract and simplify the network I/O handling, minimize risks and optimize performance. +A common use case is when a component or plugin needs to connect with a service to send +and receive data. There are many challenges to handle like unresponsive services, +networking latency, or any kind of connectivity error. The networking interface aims +to abstract and simplify the network I/O handling, minimize risks, and optimize +performance. -## Concepts +## Networking concepts -### TCP Connect Timeout +Fluent Bit uses the following networking concepts: -Most of the time creating a new TCP connection to a remote server is straightforward and takes a few milliseconds. But there are cases where DNS resolving, slow network or incomplete TLS handshakes might create long delays, or incomplete connection statuses. +### TCP connect timeout -The `net.connect_timeout` allows to configure the maximum time to wait for a connection to be established, note that this value already considers the TLS handshake process. +Typically, creating a new TCP connection to a remote server is straightforward +and takes a few milliseconds. However, there are cases where DNS resolving, a slow +network, or incomplete TLS handshakes might create long delays, or incomplete +connection statuses. -The `net.connect_timeout_log_error` indicates if an error should be logged in case of connect timeout. If disabled, the timeout is logged as debug level message instead. +- `net.connect_timeout` lets you configure the maximum time to wait for a connection + to be established. This value already considers the TLS handshake process. -### TCP Source Address +- `net.connect_timeout_log_error` indicates if an error should be logged in case of + connect timeout. If disabled, the timeout is logged as a debug level message. -On environments with multiple network interfaces, might be desired to choose which interface to use for our data that will flow through the network. +### TCP source address -The `net.source_address` allows to specify which network address must be used for a TCP connection and data flow. +On environments with multiple network interfaces, you can choose which +interface to use for Fluent Bit data that will flow through the network. -### Connection Keepalive +Use `net.source_address` to specify which network address to use for a TCP connection +and data flow. -TCP is a _connected oriented_ channel, to deliver and receive data from a remote end-point in most of cases we use a TCP connection. This TCP connection can be created and destroyed once is not longer needed, this approach has pros and cons, here we will refer to the opposite case: keep the connection open. +### Connection keepalive -The concept of `Connection Keepalive` refers to the ability of the client \(Fluent Bit on this case\) to keep the TCP connection open in a persistent way, that means that once the connection is created and used, instead of close it, it can be recycled. This feature offers many benefits in terms of performance since communication channels are always established before hand. +A connection keepalive refers to the ability of a client to keep the TCP connection +open in a persistent way. This feature offers many benefits in terms +of performance because communication channels are always established beforehand. -Any component that uses TCP channels like HTTP or [TLS](transport-security.md), can take advantage of this feature. For configuration purposes use the `net.keepalive` property. +Any component that uses TCP channels like HTTP or [TLS](transport-security.md), can +take use feature. For configuration purposes use the `net.keepalive` +property. -### Connection Keepalive Idle Timeout +### Connection keepalive idle timeout -If a connection is keepalive enabled, there might be scenarios where the connection can be unused for long periods of time. Having an idle keepalive connection is not helpful and is recommendable to keep them alive if they are used. - -In order to control how long a keepalive connection can be idle, we expose the configuration property called `net.keepalive_idle_timeout`. +If a connection keepalive is enabled, there might be scenarios where the connection +can be unused for long periods of time. Unused connections can be removed. To control +how long a keepalive connection can be idle, Fluent Bit uses a configuration property +called `net.keepalive_idle_timeout`. ### DNS mode -If a transport layer protocol is specified, the plugin whose configuration section the `net.dns.mode` setting is specified on overrides the global `dns.mode` value and issues DNS requests using the specified protocol which can be either TCP or UDP - -### Max Connections Per Worker - -By default, Fluent Bit tries to deliver data as faster as possible and create TCP connections on-demand and in keepalive mode for performance reasons. In high-scalable environments, the user might want to control how many connections are done in parallel by setting a limit. - -This can be done by the configuration property called `net.max_worker_connections` that can be used in the output plugins sections. -This feature acts at the worker level, e.g., if you have 5 workers and `net.max_worker_connections` is set to 10, a max of 50 connections will be allowed. -If the limit is reached, the output plugin will issue a retry. - - -## Configuration Options - -For plugins that rely on networking I/O, the following section describes the network configuration properties available and how they can be used to optimize performance or adjust to different configuration needs: - -| Property | Description | Default | -| :--- |:------------------------------------------------------------------------------------------------------------------------------------------|:--------------| -| `net.connect_timeout` | Set maximum time expressed in seconds to wait for a TCP connection to be established, this include the TLS handshake time. | 10 | -| `net.connect_timeout_log_error` | On connection timeout, specify if it should log an error. When disabled, the timeout is logged as a debug message. | true | -| `net.dns.mode` | Select the primary DNS connection type (TCP or UDP). Can be set in the [SERVICE] section and overridden on a per plugin basis if desired. | | -| `net.dns.prefer_ipv4` | Prioritize IPv4 DNS results when trying to establish a connection. | false | -| `net.dns.resolver`| Select the primary DNS resolver type (LEGACY or ASYNC). | | -| `net.keepalive` | Enable or disable connection keepalive support. Accepts a boolean value: on / off. | on | -| `net.keepalive_idle_timeout` | Set maximum time expressed in seconds for an idle keepalive connection. | 30 | -| `net.keepalive_max_recycle` | Set maximum number of times a keepalive connection can be used before it is retired. | 2000 | -| `net.max_worker_connections` | Set maximum number of TCP connections that can be established per worker. | 0 (unlimited) | -| `net.source_address` | Specify network address to bind for data traffic. | | +The global `dns.mode` value issues DNS requests using the specified protocol, either +TCP or UDP. If a transport layer protocol is specified, plugins that configure the +`net.dns.mode` setting override the global setting. + +### Maximum connections per worker + +For optimal performance, Fluent Bit tries to deliver data quickly and create +TCP connections on-demand and in keepalive mode. In highly scalable +environments, you might limit how many connections are created in +parallel. + +Use the `net.max_worker_connections` property in the output plugin section to set +the maximum number of allowed connections. This property acts at the worker level. +For example, if you have five workers and `net.max_worker_connections` is set +to 10, a maximum of 50 connections is allowed. If the limit is reached, the output +plugin issues a retry. + +## Configuration options + +The following table describes the network configuration properties available and +their usage in optimizing performance or adjusting configuration needs for plugins +that rely on networking I/O: + +| Property | Description | Default | +| :------- |:------------|:--------| +| `net.connect_timeout` | Set maximum time expressed in seconds to wait for a TCP connection to be established, including the TLS handshake time. | `10` | +| `net.connect_timeout_log_error` | On connection timeout, specify if it should log an error. When disabled, the timeout is logged as a debug message. | `true` | +| `net.dns.mode` | Select the primary DNS connection type (TCP or UDP). Can be set in the `[SERVICE]` section and overridden on a per plugin basis if desired. | _none_ | +| `net.dns.prefer_ipv4` | Prioritize IPv4 DNS results when trying to establish a connection. | `false` | +| `net.dns.resolver`| Select the primary DNS resolver type (`LEGACY` or `ASYNC`). | _none_ | +| `net.keepalive` | Enable or disable connection keepalive support. Accepts a Boolean value: `on` or `off`. | `on` | +| `net.keepalive_idle_timeout` | Set maximum time expressed in seconds for an idle keepalive connection. | `30` | +| `net.keepalive_max_recycle` | Set maximum number of times a keepalive connection can be used before it's retired. | `2000` | +| `net.max_worker_connections` | Set maximum number of TCP connections that can be established per worker. | `0` (unlimited) | +| `net.source_address` | Specify network address to bind for data traffic. | _none_ | ## Example -As an example, we will send 5 random messages through a TCP output connection, in the remote side we will use `nc` \(netcat\) utility to see the data. +This example sends five random messages through a TCP output connection. The remote +side uses the `nc` (netcat) utility to see the data. Put the following configuration snippet in a file called `fluent-bit.conf`: @@ -96,10 +123,11 @@ Put the following configuration snippet in a file called `fluent-bit.conf`: In another terminal, start `nc` and make it listen for messages on TCP port 9090: ```text -$ nc -l 9090 +nc -l 9090 ``` -Now start Fluent Bit with the configuration file written above and you will see the data flowing to netcat: +Start Fluent Bit with the configuration file you defined previously to see +data flowing to netcat: ```text $ nc -l 9090 @@ -110,6 +138,8 @@ $ nc -l 9090 {"date":1587769736.572277,"rand_value":527581343064950185} ``` -If the `net.keepalive` option is not enabled, Fluent Bit will close the TCP connection and netcat will quit, here we can see how the keepalive connection works. +If the `net.keepalive` option isn't enabled, Fluent Bit closes the TCP connection +and netcat quits. -After the 5 records arrive, the connection will keep idle and after 10 seconds it will be closed due to `net.keepalive_idle_timeout`. +After the five records arrive, the connection idles. After 10 seconds, the connection +closes due to `net.keepalive_idle_timeout`. diff --git a/administration/performance.md b/administration/performance.md new file mode 100644 index 000000000..1850bf2de --- /dev/null +++ b/administration/performance.md @@ -0,0 +1,57 @@ +# Performance Tips + +Fluent Bit is designed for high performance and minimal resource usage. Depending on your use case, you can optimize further using specific configuration options to achieve faster performance or reduce resource consumption. + +## Reading Files with Tail + +The `Tail` input plugin is used to read data from files on the filesystem. By default, it uses a small memory buffer of `32KB` per monitored file. While this is sufficient for most generic use cases and helps keep memory usage low when monitoring many files, there are scenarios where you may want to increase performance by using more memory. + +If your files are typically larger than `32KB`, consider increasing the buffer size to speed up file reading. For example, you can experiment with a buffer size of `128KB`: + +```yaml +pipeline: + inputs: + - name: tail + path: '/var/log/containers/*.log' + buffer_chunk_size: 128kb + buffer_max_size: 128kb +``` + +By increasing the buffer size, Fluent Bit will make fewer system calls (read(2)) to read the data, reducing CPU usage and improving performance. + +## Fluent Bit and SIMD for JSON Encoding + +Starting in Fluent Bit v3.2, performance improvements have been introduced for JSON encoding. Plugins that convert logs from Fluent Bit’s internal binary representation to JSON can now do so up to 30% faster using SIMD (Single Instruction, Multiple Data) optimizations. + +### Enabling SIMD Support + +Ensure that your Fluent Bit binary is built with SIMD support. This feature is available for architectures such as x86_64, amd64, aarch64, and arm64. As of now, SIMD is only enabled by default in Fluent Bit container images. + +You can check if SIMD is enabled by looking for the following log entry when Fluent Bit starts: + +``` +[2024/11/10 22:25:53] [ info] [fluent bit] version=3.2.0, commit=12cb22e0e9, pid=74359 +[2024/11/10 22:25:53] [ info] [storage] ver=1.5.2, type=memory, sync=normal, checksum=off, max_chunks_up=128 +[2024/11/10 22:25:53] [ info] [simd ] SSE2 +[2024/11/10 22:25:53] [ info] [cmetrics] version=0.9.8 +[2024/11/10 22:25:53] [ info] [ctraces ] version=0.5.7 +[2024/11/10 22:25:53] [ info] [sp] stream processor started +``` + +Look for the simd entry, which will indicate the SIMD support type, such as SSE2, NEON, or none. + +If your Fluent Bit binary was not built with SIMD enabled and you are using a supported platform, you can build Fluent Bit from source using the CMake option `-DFLB_SIMD=On`. + +## Run input plugins in threaded mode + +By default, most of input plugins runs in the same system thread than the main event loop, however by configuration you can instruct them to run in a separate thread which will allow you to take advantage of other CPU cores in your system. + +To run an input plugin in threaded mode, just add `threaded: true` as in the example below: + +```yaml +pipeline: + inputs: + - name: tail + path: '/var/log/containers/*.log' + threaded: true +``` diff --git a/administration/scheduling-and-retries.md b/administration/scheduling-and-retries.md index 67100acc7..d5d7496b1 100644 --- a/administration/scheduling-and-retries.md +++ b/administration/scheduling-and-retries.md @@ -1,29 +1,40 @@ # Scheduling and Retries -[Fluent Bit](https://fluentbit.io) has an Engine that helps to coordinate the data ingestion from input plugins and calls the _Scheduler_ to decide when it is time to flush the data through one or multiple output plugins. The Scheduler flushes new data at a fixed time of seconds and the _Scheduler_ retries when asked. +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=a70a6008-106f-43c8-8930-243806371482" /> -Once an output plugin gets called to flush some data, after processing that data it can notify the Engine three possible return statuses: +[Fluent Bit](https://fluentbit.io) has an engine that helps to coordinate the data +ingestion from input plugins. The engine calls the _scheduler_ to decide when it's time to +flush the data through one or multiple output plugins. The scheduler flushes new data +at a fixed number of seconds, and retries when asked. -* OK -* Retry -* Error +When an output plugin gets called to flush some data, after processing that data it +can notify the engine using these possible return statuses: -If the return status was **OK**, it means it was successfully able to process and flush the data. If it returned an **Error** status, it means that an unrecoverable error happened and the engine should not try to flush that data again. If a **Retry** was requested, the _Engine_ will ask the _Scheduler_ to retry to flush that data, the Scheduler will decide how many seconds to wait before that happens. +- `OK`: Data successfully processed and flushed. +- `Retry`: If a retry is requested, the engine asks the scheduler to retry flushing + that data. The scheduler decides how many seconds to wait before retry. +- `Error`: An unrecoverable error occurred and the engine shouldn't try to flush that data again. -## Configuring Wait Time for Retry +## Configure wait time for retry -The Scheduler provides two configuration options called **scheduler.cap** and **scheduler.base** which can be set in the Service section. +The scheduler provides two configuration options, called `scheduler.cap` and +`scheduler.base`, which can be set in the Service section. These determine the waiting +time before a retry happens. -| Key | Description | Default Value | -| -- | ------------| --------------| -| scheduler.cap | Set a maximum retry time in seconds. The property is supported from v1.8.7. | 2000 | -| scheduler.base | Set a base of exponential backoff. The property is supported from v1.8.7. | 5 | +| Key | Description | Default | +| --- | ------------| --------------| +| `scheduler.cap` | Set a maximum retry time in seconds. Supported in v1.8.7 or later. | `2000` | +| `scheduler.base` | Set a base of exponential backoff. Supported in v1.8.7 or later. | `5` | -These two configuration options determine the waiting time before a retry will happen. +The `scheduler.base` determines the lower bound of time and the `scheduler.cap` +determines the upper bound for each retry. -Fluent Bit uses an exponential backoff and jitter algorithm to determine the waiting time before a retry. +Fluent Bit uses an exponential backoff and jitter algorithm to determine the waiting +time before a retry. The waiting time is a random number between a configurable upper +and lower bound. For a detailed explanation of the exponential backoff and jitter algorithm, see +[Exponential Backoff And Jitter](https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/). -The waiting time is a random number between a configurable upper and lower bound. +For example: For the Nth retry, the lower bound of the random number will be: @@ -33,23 +44,26 @@ The upper bound will be: `min(base * (Nth power of 2), cap)` -Given an example where `base` is set to 3 and `cap` is set to 30. +For example: -1st retry: The lower bound will be 3, the upper bound will be 3 * 2 = 6. So the waiting time will be a random number between (3, 6). +When `base` is set to 3 and `cap` is set to 30: -2nd retry: the lower bound will be 3, the upper bound will be 3 * (2 * 2) = 12. So the waiting time will be a random number between (3, 12). +First retry: The lower bound will be 3. The upper bound will be `3 * 2 = 6`. +The waiting time will be a random number between (3, 6). -3rd retry: the lower bound will be 3, the upper bound will be 3 * (2 * 2 * 2) = 24. So the waiting time will be a random number between (3, 24). +Second retry: The lower bound will be 3. The upper bound will be `3 * (2 * 2) = 12`. +The waiting time will be a random number between (3, 12). -4th retry: the lower bound will be 3, since 3 * (2 * 2 * 2 * 2) = 48 > 30, the upper bound will be 30. So the waiting time will be a random number between (3, 30). +Third retry: The lower bound will be 3. The upper bound will be `3 * (2 * 2 * 2) =24`. +The waiting time will be a random number between (3, 24). -Basically, the **scheduler.base** determines the lower bound of time between each retry and the **scheduler.cap** determines the upper bound. +Fourth retry: The lower bound will be 3, because `3 * (2 * 2 * 2 * 2) = 48` > `30`. +The upper bound will be 30. The waiting time will be a random number between (3, 30). -For a detailed explanation of the exponential backoff and jitter algorithm, please check this [blog](https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/). +### Wait time example -### Example - -The following example configures the **scheduler.base** as 3 seconds and **scheduler.cap** as 30 seconds. +The following example configures the `scheduler.base` as `3` seconds and +`scheduler.cap` as `30` seconds. ```text [SERVICE] @@ -62,26 +76,29 @@ The following example configures the **scheduler.base** as 3 seconds and **sched The waiting time will be: -| Nth retry | waiting time range (seconds) | -| --- | --- | +| Nth retry | Waiting time range (seconds) | +| --- | --- | | 1 | (3, 6) | | 2 | (3, 12) | | 3 | (3, 24) | | 4 | (3, 30) | -## Configuring Retries +## Configure retries -The Scheduler provides a simple configuration option called **Retry\_Limit**, which can be set independently on each output section. This option allows us to disable retries or impose a limit to try N times and then discard the data after reaching that limit: +The scheduler provides a configuration option called `Retry_Limit`, which can be set +independently for each output section. This option lets you disable retries or +impose a limit to try N times and then discard the data after reaching that limit: | | Value | Description | | :--- | :--- | :--- | -| Retry\_Limit | N | Integer value to set the maximum number of retries allowed. N must be >= 1 \(default: 1\) | -| Retry\_Limit | `no_limits` or `False` | When Retry\_Limit is set to `no_limits` or`False`, means that there is not limit for the number of retries that the Scheduler can do. | -| Retry\_Limit | no\_retries | When Retry\_Limit is set to no\_retries, means that retries are disabled and Scheduler would not try to send data to the destination if it failed the first time. | +| `Retry_Limit` | N | Integer value to set the maximum number of retries allowed. N must be >= 1 (default: `1`) | +| `Retry_Limit` | `no_limits` or `False` | When set there no limit for the number of retries that the scheduler can do. | +| `Retry_Limit` | `no_retries` | When set, retries are disabled and scheduler doesn't try to send data to the destination if it failed the first time. | -### Example +### Retry example -The following example configures two outputs where the HTTP plugin has an unlimited number of while the Elasticsearch plugin have a limit of 5 retries: +The following example configures two outputs, where the HTTP plugin has an unlimited +number of retries, and the Elasticsearch plugin have a limit of `5` retries: ```text [OUTPUT] @@ -97,4 +114,3 @@ The following example configures two outputs where the HTTP plugin has an unlimi Logstash_Format On Retry_Limit 5 ``` - diff --git a/administration/transport-security.md b/administration/transport-security.md index cc06f4125..42cb43bc9 100644 --- a/administration/transport-security.md +++ b/administration/transport-security.md @@ -1,82 +1,93 @@ # Transport Security -Fluent Bit provides integrated support for _Transport Layer Security_ \(TLS\) and it predecessor _Secure Sockets Layer_ \(SSL\) respectively. In this section we will refer as TLS only for both implementations. +Fluent Bit provides integrated support for Transport Layer Security (TLS) and +its predecessor Secure Sockets Layer (SSL). This section refers only +to TLS for both implementations. -Both input and output plugins that perform Network I/O can optionally enable TLS and configure the behavior. The following table describes the properties available: +Both input and output plugins that perform Network I/O can optionally enable TLS and +configure the behavior. The following table describes the properties available: | Property | Description | Default | | :--- | :--- | :--- | -| tls | enable or disable TLS support | Off | -| tls.verify | force certificate validation | On | -| tls.debug | Set TLS debug verbosity level. It accept the following values: 0 \(No debug\), 1 \(Error\), 2 \(State change\), 3 \(Informational\) and 4 Verbose | 1 | -| tls.ca\_file | absolute path to CA certificate file | | -| tls.ca\_path | absolute path to scan for certificate files | | -| tls.crt\_file | absolute path to Certificate file | | -| tls.key\_file | absolute path to private Key file | | -| tls.key\_passwd | optional password for tls.key\_file file | | -| tls.vhost | hostname to be used for TLS SNI extension | | - -*Note : in order to use TLS on input plugins the user is expected to provide both a certificate and private key* - -The listed properties can be enabled in the configuration file, specifically on each output plugin section or directly through the command line. +| `tls` | Enable or disable TLS support. | `Off` | +| `tls.verify` | Force certificate validation. | `On` | +| `tls.verify_hostname` | Force TLS verification of host names. | `Off` | +| `tls.debug` | Set TLS debug verbosity level. Accepted values: `0` (No debug), `1` (Error), `2` (State change), `3` (Informational) and `4`. (Verbose) | `1` | +| `tls.ca_file` | Absolute path to CA certificate file. | _none_ | +| `tls.ca_path` | Absolute path to scan for certificate files. | _none_ | +| `tls.crt_file` | Absolute path to Certificate file. | _none_ | +| `tls.key_file` | Absolute path to private Key file. | _none_ | +| `tls.key_passwd` | Optional password for `tls.key_file` file. | _none_ | +| `tls.vhost` | Hostname to be used for TLS SNI extension. | _none_ | + +To use TLS on input plugins, you must provide both a certificate and a +private key. + +The listed properties can be enabled in the configuration file, specifically in each +output plugin section or directly through the command line. The following **output** plugins can take advantage of the TLS feature: -* [Amazon S3](../pipeline/outputs/s3.md) -* [Apache SkyWalking](../pipeline/outputs/skywalking.md) -* [Azure](../pipeline/outputs/azure.md) -* [Azure Blob](../pipeline/outputs/azure_blob.md) -* [Azure Data Explorer (Kusto)](../pipeline/outputs/azure_kusto.md) -* [Azure Logs Ingestion API](../pipeline/outputs/azure_logs_ingestion.md) -* [BigQuery](../pipeline/outputs/bigquery.md) -* [Datadog](../pipeline/outputs/datadog.md) -* [Elasticsearch](../pipeline/outputs/elasticsearch.md) -* [Forward](../pipeline/outputs/forward.md) -* [GELF](../pipeline/outputs/gelf.md) -* [Google Chronicle](../pipeline/outputs/chronicle.md) -* [HTTP](../pipeline/outputs/http.md) -* [InfluxDB](../pipeline/outputs/influxdb.md) -* [Kafka REST Proxy](../pipeline/outputs/kafka-rest-proxy.md) -* [LogDNA](../pipeline/outputs/logdna.md) -* [Loki](../pipeline/outputs/loki.md) -* [New Relic](../pipeline/outputs/new-relic.md) -* [OpenSearch](../pipeline/outputs/opensearch.md) -* [OpenTelemetry](../pipeline/outputs/opentelemetry.md) -* [Oracle Cloud Infrastructure Logging Analytics](../pipeline/outputs/oci-logging-analytics.md) -* [Prometheus Remote Write](../pipeline/outputs/prometheus-remote-write.md) -* [Slack](../pipeline/outputs/slack.md) -* [Splunk](../pipeline/outputs/splunk.md) -* [Stackdriver](../pipeline/outputs/stackdriver.md) -* [Syslog](../pipeline/outputs/syslog.md) -* [TCP & TLS](../pipeline/outputs/tcp-and-tls.md) -* [Treasure Data](../pipeline/outputs/treasure-data.md) -* [WebSocket](../pipeline/outputs/websocket.md) +- [Amazon S3](../pipeline/outputs/s3.md) +- [Apache SkyWalking](../pipeline/outputs/skywalking.md) +- [Azure](../pipeline/outputs/azure.md) +- [Azure Blob](../pipeline/outputs/azure_blob.md) +- [Azure Data Explorer (Kusto)](../pipeline/outputs/azure_kusto.md) +- [Azure Logs Ingestion API](../pipeline/outputs/azure_logs_ingestion.md) +- [BigQuery](../pipeline/outputs/bigquery.md) +- [Dash0](../pipeline/outputs/dash0.md) +- [Datadog](../pipeline/outputs/datadog.md) +- [Elasticsearch](../pipeline/outputs/elasticsearch.md) +- [Forward](../pipeline/outputs/forward.md) +- [GELF](../pipeline/outputs/gelf.md) +- [Google Chronicle](../pipeline/outputs/chronicle.md) +- [HTTP](../pipeline/outputs/http.md) +- [InfluxDB](../pipeline/outputs/influxdb.md) +- [Kafka REST Proxy](../pipeline/outputs/kafka-rest-proxy.md) +- [LogDNA](../pipeline/outputs/logdna.md) +- [Loki](../pipeline/outputs/loki.md) +- [New Relic](../pipeline/outputs/new-relic.md) +- [OpenSearch](../pipeline/outputs/opensearch.md) +- [OpenTelemetry](../pipeline/outputs/opentelemetry.md) +- [Oracle Cloud Infrastructure Logging Analytics](../pipeline/outputs/oci-logging-analytics.md) +- [Prometheus Remote Write](../pipeline/outputs/prometheus-remote-write.md) +- [Slack](../pipeline/outputs/slack.md) +- [Splunk](../pipeline/outputs/splunk.md) +- [Stackdriver](../pipeline/outputs/stackdriver.md) +- [Syslog](../pipeline/outputs/syslog.md) +- [TCP & TLS](../pipeline/outputs/tcp-and-tls.md) +- [Treasure Data](../pipeline/outputs/treasure-data.md) +- [WebSocket](../pipeline/outputs/websocket.md) The following **input** plugins can take advantage of the TLS feature: -* [Docker Events](../pipeline/inputs/docker-events.md) -* [Elasticsearch (Bulk API)](../pipeline/inputs/elasticsearch.md) -* [Forward](../pipeline/inputs/forward.md) -* [Health](../pipeline/inputs/health.md) -* [HTTP](../pipeline/inputs/http.md) -* [Kubernetes Events](../pipeline/inputs/kubernetes-events.md) -* [MQTT](../pipeline/inputs/mqtt.md) -* [NGINX Exporter Metrics](../pipeline/inputs/nginx.md) -* [OpenTelemetry](../pipeline/inputs/opentelemetry.md) -* [Prometheus Scrape Metrics](../pipeline/inputs/prometheus-scrape-metrics.md) -* [Splunk (HTTP HEC)](../pipeline/inputs/splunk.md) -* [Syslog](../pipeline/inputs/syslog.md) -* [TCP](../pipeline/inputs/tcp.md) - -In addition, other plugins implements a sub-set of TLS support, meaning, with restricted configuration: - -* [Kubernetes Filter](../pipeline/filters/kubernetes.md) +- [Docker Events](../pipeline/inputs/docker-events.md) +- [Elasticsearch (Bulk API)](../pipeline/inputs/elasticsearch.md) +- [Forward](../pipeline/inputs/forward.md) +- [Health](../pipeline/inputs/health.md) +- [HTTP](../pipeline/inputs/http.md) +- [Kubernetes Events](../pipeline/inputs/kubernetes-events.md) +- [MQTT](../pipeline/inputs/mqtt.md) +- [NGINX Exporter Metrics](../pipeline/inputs/nginx.md) +- [OpenTelemetry](../pipeline/inputs/opentelemetry.md) +- [Prometheus Scrape Metrics](../pipeline/inputs/prometheus-scrape-metrics.md) +- [Prometheus Remote Write](../pipeline/inputs/prometheus-remote-write.md) +- [Splunk (HTTP HEC)](../pipeline/inputs/splunk.md) +- [Syslog](../pipeline/inputs/syslog.md) +- [TCP](../pipeline/inputs/tcp.md) + +In addition, other plugins implement a subset of TLS support, with +restricted configuration: + +- [Kubernetes Filter](../pipeline/filters/kubernetes.md) ## Example: enable TLS on HTTP input -By default HTTP input plugin uses plain TCP, enabling TLS from the command line can be done with: -```text +By default, the HTTP input plugin uses plain TCP. Run the following command to enable +TLS: + +```bash ./bin/fluent-bit -i http \ -p port=9999 \ -p tls=on \ @@ -87,11 +98,12 @@ By default HTTP input plugin uses plain TCP, enabling TLS from the command line -m '*' ``` -In the command line above, the two properties _tls_ and _tls.verify_ where enabled for demonstration purposes \(we strongly suggest always keep verification ON\). +In the previous command, the two properties `tls` and `tls.verify` are set +for demonstration purposes. Always enable verification in production environments. The same behavior can be accomplished using a configuration file: -```text +```python [INPUT] name http port 9999 @@ -107,20 +119,22 @@ The same behavior can be accomplished using a configuration file: ## Example: enable TLS on HTTP output -By default HTTP output plugin uses plain TCP, enabling TLS from the command line can be done with: +By default, the HTTP output plugin uses plain TCP. Run the following command to enable +TLS: -```text -$ fluent-bit -i cpu -t cpu -o http://192.168.2.3:80/something \ +```bash +fluent-bit -i cpu -t cpu -o http://192.168.2.3:80/something \ -p tls=on \ -p tls.verify=off \ -m '*' ``` -In the command line above, the two properties _tls_ and _tls.verify_ where enabled for demonstration purposes \(we strongly suggest always keep verification ON\). +In the previous command, the properties `tls` and `tls.verify` are enabled +for demonstration purposes. Always enable verification in production environments. The same behavior can be accomplished using a configuration file: -```text +```python [INPUT] Name cpu Tag cpu @@ -137,11 +151,14 @@ The same behavior can be accomplished using a configuration file: ## Tips and Tricks -### Generate your own self signed certificates for testing purposes. +### Generate a self signed certificates for testing purposes -This will generate a 4096 bit RSA key pair and a certificate that is signed using SHA-256 with the expiration date set to 30 days in the future, `test.host.net` set as common name and since we opted out of `DES` the private key will be stored in plain text. +The following command generates a 4096 bit RSA key pair and a certificate that's signed +using `SHA-256` with the expiration date set to 30 days in the future. In this example, +`test.host.net` is set as the common name. This example opts out of `DES`, so the +private key is stored in plain text. -``` +```bash openssl req -x509 \ -newkey rsa:4096 \ -sha256 \ @@ -153,9 +170,12 @@ openssl req -x509 \ ### Connect to virtual servers using TLS -Fluent Bit supports [TLS server name indication](https://en.wikipedia.org/wiki/Server_Name_Indication). If you are serving multiple hostnames on a single IP address \(a.k.a. virtual hosting\), you can make use of `tls.vhost` to connect to a specific hostname. +Fluent Bit supports +[TLS server name indication](https://en.wikipedia.org/wiki/Server_Name_Indication). +If you are serving multiple host names on a single IP address (for example, using +virtual hosting), you can make use of `tls.vhost` to connect to a specific hostname. -```text +```python [INPUT] Name cpu Tag cpu @@ -170,3 +190,43 @@ Fluent Bit supports [TLS server name indication](https://en.wikipedia.org/wiki/S tls.ca_file /etc/certs/fluent.crt tls.vhost fluent.example.com ``` + +### Verify `subjectAltName` + +By default, TLS verification of host names isn't done automatically. +As an example, you can extract the X509v3 Subject Alternative Name from a certificate: + +```text +X509v3 Subject Alternative Name: + DNS:my.fluent-aggregator.net +``` + +This certificate covers only `my.fluent-aggregator.net` so if you use a different +hostname it should fail. + +To fully verify the alternative name and demonstrate the failure, enable +`tls.verify_hostname`: + +```python +[INPUT] + Name cpu + Tag cpu + +[OUTPUT] + Name forward + Match * + Host other.fluent-aggregator.net + Port 24224 + tls On + tls.verify On + tls.verify_hostname on + tls.ca_file /path/to/fluent-x509v3-alt-name.crt +``` + +This outgoing connect will fail and disconnect: + +```text +[2024/06/17 16:51:31] [error] [tls] error: unexpected EOF with reason: certificate verify failed +[2024/06/17 16:51:31] [debug] [upstream] connection #50 failed to other.fluent-aggregator.net:24224 +[2024/06/17 16:51:31] [error] [output:forward:forward.0] no upstream connections available +``` diff --git a/administration/troubleshooting.md b/administration/troubleshooting.md index cae5ad2ce..a0b0c93d5 100644 --- a/administration/troubleshooting.md +++ b/administration/troubleshooting.md @@ -1,16 +1,18 @@ # Troubleshooting -* [Tap Functionality: generate events or records](troubleshooting.md#tap-functionality) -* [Dump Internals Signal](troubleshooting#dump-internals-signal) +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=759ddb3d-b363-4ee6-91fa-21025259767a" /> -## Tap Functionality +- [Tap: generate events or records](troubleshooting.md#tap) +- [Dump internals signal](troubleshooting#dump-internals-signal) + +## Tap Tap can be used to generate events or records detailing what messages pass through Fluent Bit, at what time and what filters affect them. -### Simple example +### Basic Tap example -First, we will make sure that the container image we are going to use actually supports Fluent Bit Tap (available in Fluent Bit 2.0+): +Ensure that the container image supports Fluent Bit Tap (available in Fluent Bit 2.0+): ```shell $ docker run --rm -ti fluent/fluent-bit:latest --help | grep trace @@ -21,9 +23,11 @@ $ docker run --rm -ti fluent/fluent-bit:latest --help | grep trace --trace setup a trace pipeline on startup. Uses a single line, ie: "input=dummy.0 output=stdout output.format='json'" ``` -If the `--enable-chunk-trace` option is present it means Fluent Bit has support for Fluent Bit Tap but it is disabled by default, so remember to enable it with this option. +If the `--enable-chunk-trace` option is present, your Fluent Bit version supports +Fluent Bit Tap, but it's disabled by default. Use this option to enable it. -You can start fluent-bit with tracing activated from the beginning by using the `trace-input` and `trace-output` properties, like so: +You can start Fluent Bit with tracing activated from the beginning by using the +`trace-input` and `trace-output` properties: ```bash $ fluent-bit -Z -i dummy -o stdout -f 1 --trace-input=dummy.0 --trace-output=stdout @@ -73,13 +77,13 @@ Fluent Bit v2.1.8 [2023/07/21 16:27:07] [ info] [output:stdout:stdout.0] thread worker #0 stopped ``` -If you see the following warning then the `-Z` or `--enable-chunk-tracing` option is missing: +The following warning indicates the `-Z` or `--enable-chunk-tracing` option is missing: -```bash +```text [2023/07/21 16:26:42] [ warn] [chunk trace] enable chunk tracing via the configuration or command line to be able to activate tracing. ``` -Properties can be set for the output using the `--trace-output-property` option: +Set properties for the output using the `--trace-output-property` option: ```bash $ fluent-bit -Z -i dummy -o stdout -f 1 --trace-input=dummy.0 --trace-output=stdout --trace-output-property=format=json_lines @@ -111,25 +115,25 @@ Fluent Bit v2.1.8 [0] dummy.0: [[1689971342.068613646, {}], {"message"=>"dummy"}] ``` -With that options set the stdout plugin is now emitting traces in `json_lines` format: +With that option set, the stdout plugin emits traces in `json_lines` format: ```json {"date":1689971340.068745,"type":1,"trace_id":"0","plugin_instance":"dummy.0","records":[{"timestamp":1689971340,"record":{"message":"dummy"}}],"start_time":1689971340,"end_time":1689971340} ``` -All three options can also be defined using the much more flexible `--trace` option: +All three options can also be defined using the more flexible `--trace` option: ```bash -$ fluent-bit -Z -i dummy -o stdout -f 1 --trace="input=dummy.0 output=stdout output.format=json_lines" +fluent-bit -Z -i dummy -o stdout -f 1 --trace="input=dummy.0 output=stdout output.format=json_lines" ``` -We defined the entire tap pipeline using this configuration: `input=dummy.0 output=stdout output.format=json_lines` which defines the following: +This example defines the Tap pipeline using this configuration: `input=dummy.0 output=stdout output.format=json_lines` which defines the following: - * input: dummy.0 (listens to the tag and/or alias `dummy.0`) - * output: stdout (outputs to a stdout plugin) - * output.format: json_lines (sets the stdout format o `json_lines`) +- `input`: `dummy.0` listens to the tag or alias `dummy.0`. +- `output`: `stdout` outputs to a stdout plugin. +- `output.format`: `json_lines` sets the stdout format to `json_lines`. -Tap support can also be activated and deactivated via the embedded web server: +Tap support can also be activated and deactivated using the embedded web server: ```shell $ docker run --rm -ti -p 2020:2020 fluent/fluent-bit:latest -Z -H -i dummy -p alias=input_dummy -o stdout -f 1 @@ -152,17 +156,16 @@ Fluent Bit v2.0.0 ``` -In another terminal we can activate Tap by either using the instance id of the input; `dummy.0` or its alias. - -Since the alias is more predictable that is what we will use: - +In another terminal, activate Tap by either using the instance id of the input +(`dummy.0`) or its alias. The alias is more predictable, and is used here: ```shell $ curl 127.0.0.1:2020/api/v1/trace/input_dummy {"status":"ok"} ``` -This response means we have activated Tap, the terminal with Fluent Bit running should now look like this: +This response means Tap is active. The terminal with Fluent Bit running should now +look like this: ```shell [0] dummy.0: [1666346615.203253156, {"message"=>"dummy"}] @@ -183,38 +186,42 @@ This response means we have activated Tap, the terminal with Fluent Bit running ``` -All the records that now appear are those emitted by the activities of the dummy plugin. +All the records that display are those emitted by the activities of the dummy plugin. -### Complex example +### Complex Tap example -This example takes the same steps but demonstrates the same mechanism works with more complicated configurations. -In this example we will follow a single input of many which passes through several filters. +This example takes the same steps but demonstrates how the mechanism works with more +complicated configurations. -``` +This example follows a single input, out of many, and which passes through several +filters. + +```shell $ docker run --rm -ti -p 2020:2020 \ - fluent/fluent-bit:latest \ - -Z -H \ - -i dummy -p alias=dummy_0 -p \ - dummy='{"dummy": "dummy_0", "key_name": "foo", "key_cnt": "1"}' \ - -i dummy -p alias=dummy_1 -p dummy='{"dummy": "dummy_1"}' \ - -i dummy -p alias=dummy_2 -p dummy='{"dummy": "dummy_2"}' \ - -F record_modifier -m 'dummy.0' -p record="powered_by fluent" \ - -F record_modifier -m 'dummy.1' -p record="powered_by fluent-bit" \ - -F nest -m 'dummy.0' \ - -p operation=nest -p wildcard='key_*' -p nest_under=data \ - -o null -m '*' -f 1 + fluent/fluent-bit:latest \ + -Z -H \ + -i dummy -p alias=dummy_0 -p \ + dummy='{"dummy": "dummy_0", "key_name": "foo", "key_cnt": "1"}' \ + -i dummy -p alias=dummy_1 -p dummy='{"dummy": "dummy_1"}' \ + -i dummy -p alias=dummy_2 -p dummy='{"dummy": "dummy_2"}' \ + -F record_modifier -m 'dummy.0' -p record="powered_by fluent" \ + -F record_modifier -m 'dummy.1' -p record="powered_by fluent-bit" \ + -F nest -m 'dummy.0' \ + -p operation=nest -p wildcard='key_*' -p nest_under=data \ + -o null -m '*' -f 1 ``` -To make sure the window is not cluttered by the actual records generated by the input plugins we send all of it to `null`. +To ensure the window isn't cluttered by the records generated by the input plugins, +send all of it to `null`. -We activate with the following 'curl' command: +Activate with the following `curl` command: ```shell $ curl 127.0.0.1:2020/api/v1/trace/dummy_0 {"status":"ok"} ``` -Now we should start seeing output similar to the following: +You should start seeing output similar to the following: ```shell [0] trace: [1666349359.325597543, {"type"=>1, "trace_id"=>"trace.0", "plugin_instance"=>"dummy.0", "plugin_alias"=>"dummy_0", "records"=>[{"timestamp"=>1666349359, "record"=>{"dummy"=>"dummy_0", "key_name"=>"foo", "key_cnt"=>"1"}}], "start_time"=>1666349359, "end_time"=>1666349359}] @@ -249,12 +256,18 @@ Now we should start seeing output similar to the following: [2022/10/21 10:49:25] [ info] [output:null:null.0] thread worker #0 stopping... [2022/10/21 10:49:25] [ info] [output:null:null.0] thread worker #0 stopped ``` -### Parameters for the output in Tap -When activating Tap, any plugin parameter can be given. These can be used to modify, for example, the output format, the name of the time key, the format of the date, etc. -In the next example we will use the parameter ```"format": "json"``` to demonstrate how in Tap, stdout can be shown in Json format. +### Parameters for the output in Tap + +When activating Tap, any plugin parameter can be given. These parameters can be used +to modify the output format, the name of the time key, the format of the date, and +other details. + +The following example uses the parameter `"format": "json"` to demonstrate how +to show `stdout` in JSON format. First, run Fluent Bit enabling Tap: + ```shell $ docker run --rm -ti -p 2020:2020 fluent/fluent-bit:latest -Z -H -i dummy -p alias=input_dummy -o stdout -f 1 Fluent Bit v2.0.8 @@ -275,108 +288,102 @@ Fluent Bit v2.0.8 [0] dummy.0: [1674805466.973669512, {"message"=>"dummy"}] ... ``` -Next, in another terminal, we activate Tap including the output, in this case stdout, and the parameters wanted, in this case ```"format": "json"```: + +In another terminal, activate Tap including the output (`stdout`), and the +parameters wanted (`"format": "json"`): ```shell $ curl 127.0.0.1:2020/api/v1/trace/input_dummy -d '{"output":"stdout", "params": {"format": "json"}}' {"status":"ok"} ``` -In the first terminal, we should be seeing the output similar to the following: + +In the first terminal, you should see the output similar to the following: + ```shell [0] dummy.0: [1674805635.972373840, {"message"=>"dummy"}] [{"date":1674805634.974457,"type":1,"trace_id":"0","plugin_instance":"dummy.0","plugin_alias":"input_dummy","records":[{"timestamp":1674805634,"record":{"message":"dummy"}}],"start_time":1674805634,"end_time":1674805634},{"date":1674805634.974605,"type":3,"trace_id":"0","plugin_instance":"dummy.0","plugin_alias":"input_dummy","records":[{"timestamp":1674805634,"record":{"message":"dummy"}}],"start_time":1674805634,"end_time":1674805634},{"date":1674805635.972398,"type":1,"trace_id":"1","plugin_instance":"dummy.0","plugin_alias":"input_dummy","records":[{"timestamp":1674805635,"record":{"message":"dummy"}}],"start_time":1674805635,"end_time":1674805635},{"date":1674805635.972413,"type":3,"trace_id":"1","plugin_instance":"dummy.0","plugin_alias":"input_dummy","records":[{"timestamp":1674805635,"record":{"message":"dummy"}}],"start_time":1674805635,"end_time":1674805635}] [0] dummy.0: [1674805636.973970215, {"message"=>"dummy"}] [{"date":1674805636.974008,"type":1,"trace_id":"2","plugin_instance":"dummy.0","plugin_alias":"input_dummy","records":[{"timestamp":1674805636,"record":{"message":"dummy"}}],"start_time":1674805636,"end_time":1674805636},{"date":1674805636.974034,"type":3,"trace_id":"2","plugin_instance":"dummy.0","plugin_alias":"input_dummy","records":[{"timestamp":1674805636,"record":{"message":"dummy"}}],"start_time":1674805636,"end_time":1674805636}] ``` -This parameter shows stdout in Json format, however, as mentioned before, parameters can be passed to any plugin. -Please visit the following link for more information on other output plugins: -https://docs.fluentbit.io/manual/pipeline/outputs +This parameter shows stdout in JSON format. -### Analysis of a single Tap record +See [output plugins](https://docs.fluentbit.io/manual/pipeline/outputs) for +additional information. -Here we analyze a single record from a filter event to explain the meaning of each field in detail. -We chose a filter record since it includes the most details of all the record types. +### Analyze a single Tap record + +This filter record is an example to explain the details of a Tap record: ```json { - "type": 2, - "start_time": 1666349231, - "end_time": 1666349231, - "trace_id": "trace.1", - "plugin_instance": "nest.2", - "records": [{ - "timestamp": 1666349231, - "record": { - "dummy": "dummy_0", - "powered_by": "fluent", - "data": { - "key_name": "foo", - "key_cnt": "1" - } - } - }] + "type": 2, + "start_time": 1666349231, + "end_time": 1666349231, + "trace_id": "trace.1", + "plugin_instance": "nest.2", + "records": [{ + "timestamp": 1666349231, + "record": { + "dummy": "dummy_0", + "powered_by": "fluent", + "data": { + "key_name": "foo", + "key_cnt": "1" + } + } + }] } ``` -### type - -The type defines at what stage the event is generated: - -- type=1: input record - - this is the unadulterated input record -- type=2: filtered record - - this is a record once it has been filtered. One record is generated per filter. -- type=3: pre-output record - - this is the record right before it is sent for output. - -Since this is a record generated by the manipulation of a record by a filter is has the type `2`. - -### start_time and end_time - -This records the start and end of an event, it is a bit different for each event type: - -- type 1: when the input is received, both the start and end time. -- type 2: the time when filtering is matched until it has finished processing. -- type 3: the time when the input is received and when it is finally slated for output. - -### trace_id - -This is a string composed of a prefix and a number which is incremented with each record received by the input during the Tap session. - -### plugin_instance - -This is the plugin instance name as it is generated by Fluent Bit at runtime. - -### plugin_alias - -If an alias is set this field will contain the alias set for a plugin. - -### records - -This is an array of all the records being sent. Since Fluent Bit handles records in chunks of multiple records and chunks are indivisible the same is done in the Tap output. Each record consists of its timestamp followed by the actual data which is a composite type of keys and values. +- `type`: Defines the stage the event is generated: + - `1`: Input record. This is the unadulterated input record. + - `2`: Filtered record. This is a record after it was filtered. One record is + generated per filter. + - `3`: Pre-output record. This is the record right before it's sent for output. + + This example is a record generated by the manipulation of a record by a filter so + it has the type `2`. +- `start_time` and `end_time`: Records the start and end of an event, and is + different for each event type: + - type 1: When the input is received, both the start and end time. + - type 2: The time when filtering is matched until it has finished processing. + - type 3: The time when the input is received and when it's finally slated for output. +- `trace_id`: A string composed of a prefix and a number which is incremented with + each record received by the input during the Tap session. +- `plugin_instance`: The plugin instance name as generated by Fluent Bit at runtime. +- `plugin_alias`: If an alias is set this field will contain the alias set for a plugin. +- `records`: An array of all the records being sent. Fluent Bit handles records in + chunks of multiple records and chunks are indivisible, the same is done in the Tap + output. Each record consists of its timestamp followed by the actual data which is + a composite type of keys and values. ## Dump Internals / Signal -When the service is running we can export [metrics](monitoring.md) to see the overall status of the data flow of the service. But there are other use cases where we would like to know the current status of the internals of the service, specifically to answer questions like _what's the current status of the internal buffers ?_ , the Dump Internals feature is the answer. +When the service is running, you can export [metrics](monitoring.md) to see the +overall status of the data flow of the service. There are other use cases where +you might need to know the current status of the service internals, like the current +status of the internal buffers. Dump Internals can help provide this information. -Fluent Bit v1.4 introduces the Dump Internals feature that can be triggered easily from the command line triggering the `CONT` Unix signal. +Fluent Bit v1.4 introduced the Dump Internals feature, which can be triggered from +the command line triggering the `CONT` Unix signal. {% hint style="info" %} -note: this feature is only available on Linux and BSD family operating systems +This feature is only available on Linux and BSD operating systems. {% endhint %} ### Usage Run the following `kill` command to signal Fluent Bit: -```text +```shell kill -CONT `pidof fluent-bit` ``` -> The command `pidof` aims to lookup the Process ID of Fluent Bit. You can replace the +The command `pidof` aims to identify the Process ID of Fluent Bit. -Fluent Bit will dump the following information to the standard output interface \(stdout\): +Fluent Bit will dump the following information to the standard output interface +(`stdout`): ```text [engine] caught signal (SIGCONT) @@ -412,9 +419,9 @@ total chunks : 92 └─ down : 57 ``` -### Input Plugins Dump +### Input plugins -The dump provides insights for every input instance configured. +The input plugins dump provides insights for every input instance configured. ### Status @@ -422,46 +429,52 @@ Overall ingestion status of the plugin. | Entry | Sub-entry | Description | | :--- | :--- | :--- | -| overlimit | | If the plugin has been configured with [Mem\_Buf\_Limit](backpressure.md), this entry will report if the plugin is over the limit or not at the moment of the dump. If it is overlimit, it will print `yes`, otherwise `no`. | -| | mem\_size | Current memory size in use by the input plugin in-memory. | -| | mem\_limit | Limit set by Mem\_Buf\_Limit. | +| `overlimit` | | If the plugin has been configured with [`Mem_Buf_Limit`](backpressure.md), this entry will report if the plugin is over the limit or not at the moment of the dump. Over the limit prints `yes`, otherwise `no`. | +| | `mem_size` | Current memory size in use by the input plugin in-memory. | +| | `mem_limit` | Limit set by `Mem_Buf_Limit`. | ### Tasks -When an input plugin ingest data into the engine, a Chunk is created. A Chunk can contains multiple records. Upon flush time, the engine creates a Task that contains the routes for the Chunk associated in question. +When an input plugin ingests data into the engine, a Chunk is created. A Chunk can +contains multiple records. At flush time, the engine creates a Task that contains the +routes for the Chunk associated in question. The Task dump describes the tasks associated to the input plugin: | Entry | Description | | :--- | :--- | -| total\_tasks | Total number of active tasks associated to data generated by the input plugin. | -| new | Number of tasks not assigned yet to an output plugin. Tasks are in `new` status for a very short period of time \(most of the time this value is very low or zero\). | -| running | Number of active tasks being processed by output plugins. | -| size | Amount of memory used by the Chunks being processed \(Total chunks size\). | +| `total_tasks` | Total number of active tasks associated to data generated by the input plugin. | +| `new` | Number of tasks not yet assigned to an output plugin. Tasks are in `new` status for a very short period of time. This value is normally very low or zero. | +| `running` | Number of active tasks being processed by output plugins. | +| `size` | Amount of memory used by the Chunks being processed (total chunk size). | ### Chunks -The Chunks dump tells more details about all the chunks that the input plugin has generated and are still being processed. +The Chunks dump tells more details about all the chunks that the input plugin has +generated and are still being processed. -Depending of the buffering strategy and limits imposed by configuration, some Chunks might be `up` \(in memory\) or `down` \(filesystem\). +Depending of the buffering strategy and limits imposed by configuration, some Chunks +might be `up` (in memory) or `down` (filesystem). | Entry | Sub-entry | Description | | :--- | :--- | :--- | -| total\_chunks | | Total number of Chunks generated by the input plugin that are still being processed by the engine. | -| up\_chunks | | Total number of Chunks that are loaded in memory. | -| down\_chunks | | Total number of Chunks that are stored in the filesystem but not loaded in memory yet. | -| busy\_chunks | | Chunks marked as busy \(being flushed\) or locked. Busy Chunks are immutable and likely are ready to \(or being\) processed. | -| | size | Amount of bytes used by the Chunk. | -| | size err | Number of Chunks in an error state where it size could not be retrieved. | +| `total_chunks` | | Total number of Chunks generated by the input plugin that are still being processed by the engine. | +| `up_chunks` | | Total number of Chunks loaded in memory. | +| `down_chunks` | | Total number of Chunks stored in the filesystem but not loaded in memory yet. | +| `busy_chunks` | | Chunks marked as busy (being flushed) or locked. Busy Chunks are immutable and likely are ready to be or are being processed. | +| | `size` | Amount of bytes used by the Chunk. | +| | `size err` | Number of Chunks in an error state where its size couldn't be retrieved. | -### Storage Layer Dump +### Storage Layer -Fluent Bit relies on a custom storage layer interface designed for hybrid buffering. The `Storage Layer` entry contains a total summary of Chunks registered by Fluent Bit: +Fluent Bit relies on a custom storage layer interface designed for hybrid buffering. +The `Storage Layer` entry contains a total summary of Chunks registered by Fluent +Bit: | Entry | Sub-Entry | Description | | :--- | :--- | :--- | -| total chunks | | Total number of Chunks | -| mem chunks | | Total number of Chunks memory-based | -| fs chunks | | Total number of Chunks filesystem based | -| | up | Total number of filesystem chunks up in memory | -| | down | Total number of filesystem chunks down \(not loaded in memory\) | +| `total chunks` | | Total number of Chunks. | +| `mem chunks` | | Total number of Chunks memory-based. | +| `fs chunks` | | Total number of Chunks filesystem based. | +| | `up` | Total number of filesystem chunks up in memory. | +| | `down` | Total number of filesystem chunks down (not loaded in memory). | diff --git a/concepts/buffering.md b/concepts/buffering.md index b7f82affd..a45a4e963 100644 --- a/concepts/buffering.md +++ b/concepts/buffering.md @@ -1,22 +1,32 @@ --- -description: Performance and Data Safety +description: Performance and data safety --- # Buffering -When [Fluent Bit](https://fluentbit.io) processes data, it uses the system memory \(heap\) as a primary and temporary place to store the record logs before they get delivered, in this private memory area the records are processed. +When [Fluent Bit](https://fluentbit.io) processes data, it uses the system memory +(heap) as a primary and temporary place to store the record logs before they get +delivered. The records are processed in this private memory area. -Buffering refers to the ability to store the records somewhere, and while they are processed and delivered, still be able to store more. Buffering in memory is the fastest mechanism, but there are certain scenarios where it requires special strategies to deal with [backpressure](../administration/backpressure.md), data safety or reduce memory consumption by the service in constrained environments. +Buffering is the ability to store the records, and continue storing incoming data +while previous data is processed and delivered. Buffering in memory is the fastest +mechanism, but there are scenarios requiring special strategies to deal with +[backpressure](../administration/backpressure.md), data safety, or to reduce memory +consumption by the service in constrained environments. -{% hint style="info" %} -Network failures or latency on third party service is pretty common, and on scenarios where we cannot deliver data fast enough as we receive new data to process, we likely will face backpressure. +Network failures or latency in third party service is common. When data can't be +delivered fast enough and new data to process arrives, the system can face +backpressure. -Our buffering strategies are designed to solve problems associated with backpressure and general delivery failures. -{% endhint %} +Fluent Bit buffering strategies are designed to solve problems associated with +backpressure and general delivery failures. Fluent Bit offers a primary buffering +mechanism in memory and an optional secondary one using the file system. With +this hybrid solution you can accommodate any use case safely and keep a high +performance while processing your data. -Fluent Bit as buffering strategies go, offers a primary buffering mechanism in **memory** and an optional secondary one using the **file system**. With this hybrid solution you can accommodate any use case safely and keep a high performance while processing your data. - -Both mechanisms are not mutually exclusive and when the data is ready to be processed or delivered it will always be **in memory**, while other data in the queue might be in the file system until is ready to be processed and moved up to memory. - -To learn more about the buffering configuration in Fluent Bit, please jump to the [Buffering & Storage](../administration/buffering-and-storage.md) section. +These mechanisms aren't mutually exclusive. When data is ready to be processed or +delivered it's always be in memory, while other data in the queue might be in +the file system until is ready to be processed and moved up to memory. +To learn more about the buffering configuration in Fluent Bit, see +[Buffering & Storage](../administration/buffering-and-storage.md). diff --git a/concepts/data-pipeline/buffer.md b/concepts/data-pipeline/buffer.md index c13f904e4..9d0f02785 100644 --- a/concepts/data-pipeline/buffer.md +++ b/concepts/data-pipeline/buffer.md @@ -4,14 +4,28 @@ description: Data processing with reliability # Buffer -Previously defined in the [Buffering](../buffering.md) concept section, the `buffer` phase in the pipeline aims to provide a unified and persistent mechanism to store your data, either using the primary in-memory model or using the filesystem based mode. +The [`buffer`](../buffering.md) phase in the pipeline aims to provide a unified and +persistent mechanism to store your data, using the primary in-memory model or the +file system-based mode. -The `buffer` phase already contains the data in an immutable state, meaning, no other filter can be applied. +The `buffer` phase contains the data in an immutable state, meaning that no other +filter can be applied. - (1) (2) (2) (2) (2) (2) (2) (2) (1).png>) +```mermaid +graph LR + accTitle: Fluent Bit data pipeline + accDescr: A diagram of the Fluent Bit data pipeline, which includes input, a parser, a filter, a buffer, routing, and various outputs. + A[Input] --> B[Parser] + B --> C[Filter] + C --> D[Buffer] + D --> E((Routing)) + E --> F[Output 1] + E --> G[Output 2] + E --> H[Output 3] + style D stroke:darkred,stroke-width:2px; +``` -{% hint style="info" %} -Note that buffered data is not raw text, it's in Fluent Bit's internal binary representation. -{% endhint %} +Buffered data uses the Fluent Bit internal binary representation, which isn't raw text. -Fluent Bit offers a buffering mechanism in the file system that acts as a _backup system_ to avoid data loss in case of system failures. +Fluent Bit offers a buffering mechanism in the file system that acts as a backup +system to avoid data loss in case of system failures. diff --git a/concepts/data-pipeline/filter.md b/concepts/data-pipeline/filter.md index 2323d165e..c6c9eef81 100644 --- a/concepts/data-pipeline/filter.md +++ b/concepts/data-pipeline/filter.md @@ -1,17 +1,34 @@ --- -description: Modify, Enrich or Drop your records +description: Modify, enrich or drop your records --- # Filter -In production environments we want to have full control of the data we are collecting, filtering is an important feature that allows us to **alter** the data before delivering it to some destination. +In production environments you need full control of the data you're collecting. +Filtering lets you alter the collected data before delivering it to a destination. - (2) (2) (2) (2) (2) (2) (1).png>) +```mermaid +graph LR + accTitle: Fluent Bit data pipeline + accDescr: A diagram of the Fluent Bit data pipeline, which includes input, a parser, a filter, a buffer, routing, and various outputs. + A[Input] --> B[Parser] + B --> C[Filter] + C --> D[Buffer] + D --> E((Routing)) + E --> F[Output 1] + E --> G[Output 2] + E --> H[Output 3] + style C stroke:darkred,stroke-width:2px; +``` -Filtering is implemented through plugins, so each filter available could be used to match, exclude or enrich your logs with some specific metadata. +Filtering is implemented through plugins. Each available filter can be used to +match, exclude, or enrich your logs with specific metadata. -We support many filters, A common use case for filtering is Kubernetes deployments. Every Pod log needs to get the proper metadata associated +Fluent Bit support many filters. A common use case for filtering is Kubernetes +deployments. Every pod log needs the proper metadata associated with it. -Very similar to the input plugins, Filters run in an instance context, which has its own independent configuration. Configuration keys are often called **properties**. +Like input plugins, filters run in an instance context, which has its own independent +configuration. Configuration keys are often called _properties_. -For more details about the Filters available and their usage, please refer to the [Filters](https://docs.fluentbit.io/manual/pipeline/filters) section. +For more details about the Filters available and their usage, see +[Filters](https://docs.fluentbit.io/manual/pipeline/filters). diff --git a/concepts/data-pipeline/input.md b/concepts/data-pipeline/input.md index ca8500b0b..20f73ddcf 100644 --- a/concepts/data-pipeline/input.md +++ b/concepts/data-pipeline/input.md @@ -4,12 +4,29 @@ description: The way to gather data from your sources # Input -[Fluent Bit](http://fluentbit.io) provides different _Input Plugins_ to gather information from different sources, some of them just collect data from log files while others can gather metrics information from the operating system. There are many plugins for different needs. +[Fluent Bit](http://fluentbit.io) provides input plugins to gather information from +different sources. Some plugins collect data from log files, while others can +gather metrics information from the operating system. There are many plugins to suit +different needs. - (2) (2) (2) (2) (2) (2) (2) (1).png>) +```mermaid +graph LR + accTitle: Fluent Bit data pipeline + accDescr: A diagram of the Fluent Bit data pipeline, which includes input, a parser, a filter, a buffer, routing, and various outputs. + A[Input] --> B[Parser] + B --> C[Filter] + C --> D[Buffer] + D --> E((Routing)) + E --> F[Output 1] + E --> G[Output 2] + E --> H[Output 3] + style A stroke:darkred,stroke-width:2px; +``` -When an input plugin is loaded, an internal _instance_ is created. Every instance has its own and independent configuration. Configuration keys are often called **properties**. +When an input plugin loads, an internal _instance_ is created. Each instance has its +own independent configuration. Configuration keys are often called _properties_. -Every input plugin has its own documentation section where it's specified how it can be used and what properties are available. +Every input plugin has its own documentation section that specifies how to use it +and what properties are available. -For more details, please refer to the [Input Plugins](https://docs.fluentbit.io/manual/pipeline/inputs) section. +For more details, see [Input Plugins](https://docs.fluentbit.io/manual/pipeline/inputs). diff --git a/concepts/data-pipeline/output.md b/concepts/data-pipeline/output.md index 5a96f7ee6..d341a67f0 100644 --- a/concepts/data-pipeline/output.md +++ b/concepts/data-pipeline/output.md @@ -1,15 +1,33 @@ --- -description: 'Destinations for your data: databases, cloud services and more!' +description: Learn about destinations for your data, such as databases and cloud services. --- # Output -The output interface allows us to define destinations for the data. Common destinations are remote services, local file system or standard interface with others. Outputs are implemented as plugins and there are many available. +The output interface lets you define destinations for your data. Common destinations +are remote services, local file systems, or other standard interfaces. Outputs are +implemented as plugins. - (1).png>) +```mermaid +graph LR + accTitle: Fluent Bit data pipeline + accDescr: A diagram of the Fluent Bit data pipeline, which includes input, a parser, a filter, a buffer, routing, and various outputs. + A[Input] --> B[Parser] + B --> C[Filter] + C --> D[Buffer] + D --> E((Routing)) + E --> F[Output 1] + E --> G[Output 2] + E --> H[Output 3] + style F stroke:darkred,stroke-width:2px; + style G stroke:darkred,stroke-width:2px; + style H stroke:darkred,stroke-width:2px; +``` -When an output plugin is loaded, an internal _instance_ is created. Every instance has its own independent configuration. Configuration keys are often called **properties**. +When an output plugin is loaded, an internal _instance_ is created. Every instance +has its own independent configuration. Configuration keys are often called +_properties_. Every output plugin has its own documentation section specifying how it can be used and what properties are available. -For more details, please refer to the [Output Plugins](https://docs.fluentbit.io/manual/pipeline/outputs) section. +For more details, see [Output Plugins](https://docs.fluentbit.io/manual/pipeline/outputs). diff --git a/concepts/data-pipeline/parser.md b/concepts/data-pipeline/parser.md index 034376606..54b973991 100644 --- a/concepts/data-pipeline/parser.md +++ b/concepts/data-pipeline/parser.md @@ -1,20 +1,38 @@ --- -description: Convert Unstructured to Structured messages +description: Convert unstructured messages to structured messages --- # Parser -Dealing with raw strings or unstructured messages is a constant pain; having a structure is highly desired. Ideally we want to set a structure to the incoming data by the Input Plugins as soon as they are collected: - - (1) (1) (1) (2) (2) (2) (3) (3) (3) (3) (3) (1).png>) +Dealing with raw strings or unstructured messages is difficult. Having a structure +makes data more usable. Set a structure to the incoming data by using input +plugins as data is collected: + +```mermaid +graph LR + accTitle: Fluent Bit data pipeline + accDescr: A diagram of the Fluent Bit data pipeline, which includes input, a parser, a filter, a buffer, routing, and various outputs. + A[Input] --> B[Parser] + B --> C[Filter] + C --> D[Buffer] + D --> E((Routing)) + E --> F[Output 1] + E --> G[Output 2] + E --> H[Output 3] + style B stroke:darkred,stroke-width:2px; +``` -The Parser allows you to convert from unstructured to structured data. As a demonstrative example consider the following Apache (HTTP Server) log entry: +The parser converts unstructured data to structured data. As an example, consider the +following Apache (HTTP Server) log entry: -``` +```text 192.168.2.20 - - [28/Jul/2006:10:27:10 -0300] "GET /cgi-bin/try/ HTTP/1.0" 200 3395 ``` -The above log line is a raw string without format, ideally we would like to give it a structure that can be processed later easily. If the proper configuration is used, the log entry could be converted to: +This log line is a raw string without format. Structuring the log makes it easier +to process the data later. If the +[regular expression parser](pipeline/parsers/regular-expression) is used, the log +entry could be converted to: ```javascript { @@ -29,4 +47,6 @@ The above log line is a raw string without format, ideally we would like to give } ``` -Parsers are fully configurable and are independently and optionally handled by each input plugin, for more details please refer to the [Parsers](https://docs.fluentbit.io/manual/pipeline/parsers) section. +Parsers are fully configurable and are independently and optionally handled by each +input plugin. For more details, see +[Parsers](https://docs.fluentbit.io/manual/pipeline/parsers). diff --git a/concepts/data-pipeline/router.md b/concepts/data-pipeline/router.md index 0041c992e..4a6241630 100644 --- a/concepts/data-pipeline/router.md +++ b/concepts/data-pipeline/router.md @@ -4,22 +4,39 @@ description: Create flexible routing rules # Router -Routing is a core feature that allows to **route** your data through Filters and finally to one or multiple destinations. The router relies on the concept of [Tags](../key-concepts.md) and [Matching](../key-concepts.md) rules - - (1) (2) (2) (2) (2) (2) (2) (2) (1) (1).png>) +Routing is a core feature that lets you route your data through filters and then to +one or multiple destinations. The router relies on the concept of +[Tags](../key-concepts.md) and [Matching](../key-concepts.md) rules. + +```mermaid +graph LR + accTitle: Fluent Bit data pipeline + accDescr: A diagram of the Fluent Bit data pipeline, which includes input, a parser, a filter, a buffer, routing, and various outputs. + A[Input] --> B[Parser] + B --> C[Filter] + C --> D[Buffer] + D --> E((Routing)) + E --> F[Output 1] + E --> G[Output 2] + E --> H[Output 3] + style E stroke:darkred,stroke-width:2px; +``` There are two important concepts in Routing: -* Tag -* Match +- Tag +- Match -When the data is generated by the input plugins, it comes with a **Tag** (most of the time the Tag is configured manually), the Tag is a human-readable indicator that helps to identify the data source. +When data is generated by an input plugin, it comes with a `Tag`. A Tag is a +human-readable indicator that helps to identify the data source. Tags are usually +configured manually. -In order to define **where** the data should be routed, a **Match** rule must be specified in the output configuration. +To define where to route data, specify a `Match` rule in the output configuration. -Consider the following configuration example that aims to deliver CPU metrics to an Elasticsearch database and Memory metrics to the standard output interface: +Consider the following configuration example that delivers `CPU` metrics to an +Elasticsearch database and Memory (`mem`) metrics to the standard output interface: -``` +```text [INPUT] Name cpu Tag my_cpu @@ -37,15 +54,15 @@ Consider the following configuration example that aims to deliver CPU metrics to Match my_mem ``` -> Note: the above is a simple example demonstrating how Routing is configured. - -Routing works automatically reading the Input Tags and the Output Match rules. If some data has a Tag that doesn't match upon routing time, the data is deleted. +Routing reads the `Input` `Tag` and the `Output` `Match` rules. If data has a `Tag` +that doesn't match at routing time, the data is deleted. ## Routing with Wildcard -Routing is flexible enough to support _wildcard_ in the **Match** pattern. The below example defines a common destination for both sources of data: +Routing is flexible enough to support wildcards in the `Match` pattern. The following +example defines a common destination for both sources of data: -``` +```text [INPUT] Name cpu Tag my_cpu @@ -59,14 +76,15 @@ Routing is flexible enough to support _wildcard_ in the **Match** pattern. The b Match my_* ``` -The match rule is set to **my\_\*** which means it will match any Tag that starts with **my\_**. +The match rule is set to `my_*`, which matches any Tag starting with `my_*`. ## Routing with Regex -Routing also provides support for _regex_ with the **Match_Regex** pattern, allowing for more complex and precise matching criteria. -The following example demonstrates how to route data from sources based on a regular expression: +Routing also provides support for regular expressions with the `Match_Regex` pattern, +allowing for more complex and precise matching criteria. The following example +demonstrates how to route data from sources based on a regular expression: -``` +```text [INPUT] Name temperature_sensor Tag temp_sensor_A @@ -77,8 +95,10 @@ The following example demonstrates how to route data from sources based on a reg [OUTPUT] Name stdout - Match_regex .*_sensor_[AB] + Match_regex .*_sensor_[AB] ``` -In this configuration, the **Match_regex** rule is set to `.*_sensor_[AB]`. This regular expression will match any Tag that ends with "_sensor_A" or "_sensor_B", regardless of what precedes it. -This approach provides a more flexible and powerful way to handle different source tags with a single routing rule. +In this configuration, the `Match_regex` rule is set to `.*_sensor_[AB]`. This +regular expression matches any `Tag` that ends with `_sensor_A` or `_sensor_B`, +regardless of what precedes it. This approach provides a more flexible and powerful +way to handle different source tags with a single routing rule. diff --git a/concepts/key-concepts.md b/concepts/key-concepts.md index c3b70801a..fc9841f30 100644 --- a/concepts/key-concepts.md +++ b/concepts/key-concepts.md @@ -1,25 +1,28 @@ --- -description: >- - There are a few key concepts that are really important to understand how - Fluent Bit operates. +description: Learn these key concepts to understand how Fluent Bit operates. --- -# Key Concepts +# Key concepts -Before diving into [Fluent Bit](https://fluentbit.io) it’s good to get acquainted with some of the key concepts of the service. This document provides a gentle introduction to those concepts and common [Fluent Bit](https://fluentbit.io) terminology. We’ve provided a list below of all the terms we’ll cover, but we recommend reading this document from start to finish to gain a more general understanding of our log and stream processor. +Before diving into [Fluent Bit](https://fluentbit.io) you might want to get acquainted +with some of the key concepts of the service. This document provides an +introduction to those concepts and common [Fluent Bit](https://fluentbit.io) +terminology. Reading this document will help you gain a more general understanding of the +following topics: -* Event or Record -* Filtering -* Tag -* Timestamp -* Match -* Structured Message +- Event or Record +- Filtering +- Tag +- Timestamp +- Match +- Structured Message ## Event or Record -Every incoming piece of data that belongs to a log or a metric that is retrieved by Fluent Bit is considered an Event or a Record. +Every incoming piece of data that belongs to a log or a metric that's retrieved by +Fluent Bit is considered an _Event_ or a _Record_. -As an example consider the following content of a Syslog file: +As an example, consider the following content of a Syslog file: ```text Jan 18 12:52:16 flb systemd[2222]: Starting GNOME Terminal Server @@ -28,30 +31,31 @@ Jan 18 12:52:16 flb systemd[2222]: Started GNOME Terminal Server. Jan 18 12:52:16 flb gsd-media-keys[2640]: # watch_fast: "/org/gnome/terminal/legacy/" (establishing: 0, active: 0) ``` -It contains four lines and all of them represents **four** independent Events. +It contains four lines that represent four independent Events. -Internally an Event is comprised of: +An Event is comprised of: -* timestamp -* key/value metadata (since v2.1.0) -* payload +- timestamp +- key/value metadata (v2.1.0 and greater) +- payload ### Event format -The Fluent Bit wire protocol represents an Event as a 2-element array +The Fluent Bit wire protocol represents an Event as a two-element array with a nested array as the first element: -```javascript +```javascript copy [[TIMESTAMP, METADATA], MESSAGE] ``` where -* TIMESTAMP is a timestamp in seconds as an integer or floating point value (not a string); -* METADATA is a possibly-empty object containing event metadata; and -* MESSAGE is an object containing the event body. +- _`TIMESTAMP`_ is a timestamp in seconds as an integer or floating point value + (not a string). +- _`METADATA`_ is an object containing event metadata, and might be empty. +- _`MESSAGE`_ is an object containing the event body. -Fluent Bit versions prior to v2.1.0 instead used: +Fluent Bit versions prior to v2.1.0 used: ```javascript [TIMESTAMP, MESSAGE] @@ -62,74 +66,79 @@ streams. ## Filtering -In some cases it is required to perform modifications on the Events content, the process to alter, enrich or drop Events is called Filtering. +You might need to perform modifications on an Event's content. The process to alter, +append to, or drop Events is called [_filtering_](data-pipeline/filter.md). -There are many use cases when Filtering is required like: +Use filtering to: -* Append specific information to the Event like an IP address or metadata. -* Select a specific piece of the Event content. -* Drop Events that matches certain pattern. +- Append specific information to the Event like an IP address or metadata. +- Select a specific piece of the Event content. +- Drop Events that match a certain pattern. ## Tag -Every Event that gets into Fluent Bit gets assigned a Tag. This tag is an internal string that is used in a later stage by the Router to decide which Filter or Output phase it must go through. +Every Event ingested by Fluent Bit is assigned a Tag. This tag is an internal string +used in a later stage by the Router to decide which Filter or +[Output](data-pipeline/output.md) phase it must go through. -Most of the tags are assigned manually in the configuration. If a tag is not specified, Fluent Bit will assign the name of the Input plugin instance from where that Event was generated from. +Most tags are assigned manually in the configuration. If a tag isn't specified, +Fluent Bit assigns the name of the [Input](data-pipeline/input.md) plugin +instance where that Event was generated from. {% hint style="info" %} -The only input plugin that **does NOT** assign tags is [Forward](../pipeline/inputs/forward.md) input. This plugin speaks the Fluentd wire protocol called Forward where every Event already comes with a Tag associated. Fluent Bit will always use the incoming Tag set by the client. +The [Forward](../pipeline/inputs/forward.md) input plugin doesn't assign tags. This +plugin speaks the Fluentd wire protocol called Forward where every Event already +comes with a Tag associated. Fluent Bit will always use the incoming Tag set by the +client. {% endhint %} -A Tagged record must always have a Matching rule. To learn more about Tags and Matches check the [Routing](data-pipeline/router.md) section. +A tagged record must always have a Matching rule. To learn more about Tags and +Matches, see [Routing](data-pipeline/router.md). ## Timestamp -The Timestamp represents the _time_ when an Event was created. Every Event contains a Timestamp associated. The Timestamp is a numeric fractional integer in the format: +The timestamp represents the time an Event was created. Every Event contains an +associated timestamps. All events have timestamps, and they're set by the input plugin or +discovered through a data parsing process. + +The timestamp is a numeric fractional integer in the format: ```javascript SECONDS.NANOSECONDS ``` -### Seconds - -It is the number of seconds that have elapsed since the _Unix epoch._ - -### Nanoseconds +where: -Fractional second or one thousand-millionth of a second. - -{% hint style="info" %} -A timestamp always exists, either set by the Input plugin or discovered through a data parsing process. -{% endhint %} +- `_SECONDS_` is the number of seconds that have elapsed since the Unix epoch. +- `_NANOSECONDS_` is a fractional second or one thousand-millionth of a second. ## Match -Fluent Bit allows to deliver your collected and processed Events to one or multiple destinations, this is done through a routing phase. A Match represent a simple rule to select Events where it Tags matches a defined rule. +Fluent Bit lets you route your collected and processed Events to one or multiple +destinations. A _Match_ represents a rule to select Events +where a Tag matches a defined rule. -To learn more about Tags and Matches check the [Routing](data-pipeline/router.md) section. +To learn more about Tags and Matches, see [Routing](data-pipeline/router.md). -## Structured Messages +## Structured messages -Source events can have or not have a structure. A structure defines a set of _keys_ and _values_ inside the Event message. As an example consider the following two messages: +Source events can have a structure. A structure defines a set of `keys` and `values` +inside the Event message to implement faster operations on data modifications. +Fluent Bit treats every Event message as a structured message. -### No structured message +Consider the following two messages: -```javascript -"Project Fluent Bit created on 1398289291" -``` - -### Structured Message +- No structured message -```javascript -{"project": "Fluent Bit", "created": 1398289291} -``` + ```javascript + "Project Fluent Bit created on 1398289291" + ``` -At a low level both are just an array of bytes, but the Structured message defines _keys_ and _values_, having a structure helps to implement faster operations on data modifications. +- With a structured message -{% hint style="info" %} -Fluent Bit **always** handles every Event message as a structured message. -For performance reasons, we use a binary serialization data format called [MessagePack](https://msgpack.org/). - -Consider [MessagePack](https://msgpack.org/) as a binary version of JSON on steroids. -{% endhint %} + ```javascript + {"project": "Fluent Bit", "created": 1398289291} + ``` +For performance reasons, Fluent Bit uses a binary serialization data format called +[MessagePack](https://msgpack.org/). diff --git a/development/golang-output-plugins.md b/development/golang-output-plugins.md index 73c5b8204..3113532aa 100644 --- a/development/golang-output-plugins.md +++ b/development/golang-output-plugins.md @@ -21,7 +21,7 @@ Usage: fluent-bit [OPTION] Available Options -c --config=FILE specify an optional configuration file -d, --daemon run Fluent Bit in background mode - -f, --flush=SECONDS flush timeout in seconds (default: 5) + -f, --flush=SECONDS flush timeout in seconds (default: 1) -i, --input=INPUT set an input -m, --match=MATCH set plugin match, same as '-p match=abc' -o, --output=OUTPUT set an output diff --git a/development/wasm-filter-plugins.md b/development/wasm-filter-plugins.md index b47207a1d..e8cfb376e 100644 --- a/development/wasm-filter-plugins.md +++ b/development/wasm-filter-plugins.md @@ -170,8 +170,8 @@ These examples can be applied in our demo and can serve as an ideal starting poi ### Optimize execution of WASM programs To optimize WASM program execution, there is the option of using `flb-wamrc`. -`flb-wamrc` will reduce runtime footprint and to be best perforemance for filtering operations. -This tool will be built when `-DFLB_WAMRC=On` cmake option is specififed and llvm infrastructure is installed on the building box. +`flb-wamrc` will reduce runtime footprint and to be best performance for filtering operations. +This tool will be built when `-DFLB_WAMRC=On` cmake option is specified and llvm infrastructure is installed on the building box. ```shell $ flb-wamrc -o /path/to/built_wasm.aot /path/to/built_wasm.wasm diff --git a/imgs/processor_opentelemetry_envelope.png b/imgs/processor_opentelemetry_envelope.png new file mode 100644 index 000000000..d44920a14 Binary files /dev/null and b/imgs/processor_opentelemetry_envelope.png differ diff --git a/installation/amazon-ec2.md b/installation/amazon-ec2.md index f35f0aa57..25c5a70ee 100644 --- a/installation/amazon-ec2.md +++ b/installation/amazon-ec2.md @@ -1,4 +1,4 @@ # Amazon EC2 -Learn how to [install Fluent Bit and the AWS output plugins on Amazon Linux 2 via AWS Systems Manager](https://github.com/aws/aws-for-fluent-bit/tree/master/examples/fluent-bit/systems-manager-ec2). - +Learn how to install Fluent Bit and the AWS output plugins on Amazon Linux 2 using +[AWS Systems Manager](https://github.com/aws/aws-for-fluent-bit/tree/master/examples/fluent-bit/systems-manager-ec2). diff --git a/installation/aws-container.md b/installation/aws-container.md index 89af22448..29b9a9363 100644 --- a/installation/aws-container.md +++ b/installation/aws-container.md @@ -1,45 +1,57 @@ # Containers on AWS -AWS maintains a distribution of Fluent Bit combining the latest official release with a set of Go Plugins for sending logs to AWS services. AWS and Fluent Bit are working together to rewrite their plugins for inclusion in the official Fluent Bit distribution. +AWS maintains a distribution of Fluent Bit that combines the latest official release with +a set of Go Plugins for sending logs to AWS services. AWS and Fluent Bit are working +together to rewrite their plugins for inclusion in the official Fluent Bit +distribution. ## Plugins -Currently, the [AWS for Fluent Bit](https://github.com/aws/aws-for-fluent-bit) image contains Go Plugins for: +The [AWS for Fluent Bit](https://github.com/aws/aws-for-fluent-bit) image contains Go +Plugins for: -* [Amazon CloudWatch Logs](https://github.com/aws/amazon-cloudwatch-logs-for-fluent-bit) -* [Amazon Kinesis Firehose](https://github.com/aws/amazon-kinesis-firehose-for-fluent-bit) -* [Amazon Kinesis Streams](https://github.com/aws/amazon-kinesis-streams-for-fluent-bit) +- Amazon CloudWatch as `cloudwatch_logs`. See the + [Fluent Bit docs](https://docs.fluentbit.io/manual/pipeline/outputs/cloudwatch) or the + [Plugin repository](https://github.com/aws/amazon-cloudwatch-logs-for-fluent-bit). +- Amazon Kinesis Data Firehose as `kinesis_firehose`. See the + [Fluent Bit docs](https://docs.fluentbit.io/manual/pipeline/outputs/firehose) or the + [Plugin repository](https://github.com/aws/amazon-kinesis-firehose-for-fluent-bit). +- Amazon Kinesis Data Streams as `kinesis_streams`. See the + [Fluent Bit docs](https://docs.fluentbit.io/manual/pipeline/outputs/kinesis) or the + [Plugin repository](https://github.com/aws/amazon-kinesis-streams-for-fluent-bit). -Fluent Bit includes Amazon CloudWatch Logs plugin named `cloudwatch_logs`, Amazon Kinesis Firehose plugin named `kinesis_firehose` and Amazon Kinesis Data Streams plugin named `kinesis_streams` which are higher performance than Go plugins. +These plugins are higher performance than Go plugins. -* [Amazon CloudWatch](https://docs.fluentbit.io/manual/pipeline/outputs/cloudwatch) -* [Amazon Kinesis Data Firehose](https://docs.fluentbit.io/manual/pipeline/outputs/firehose) -* [Amazon Kinesis Data Streams](https://docs.fluentbit.io/manual/pipeline/outputs/kinesis) +Also, Fluent Bit includes an S3 output plugin named `s3`. -Also, Fluent Bit includes S3 output plugin named `s3`. - -* [Amazon S3](https://docs.fluentbit.io/manual/pipeline/outputs/s3) +- [Amazon S3](https://docs.fluentbit.io/manual/pipeline/outputs/s3) ## Versions and Regional Repositories -AWS vends their container image via [Docker Hub](https://hub.docker.com/r/amazon/aws-for-fluent-bit), and a set of highly available regional Amazon ECR repositories. For more information, see the [AWS for Fluent Bit GitHub repo](https://github.com/aws/aws-for-fluent-bit#public-images). +AWS vends their container image using +[Docker Hub](https://hub.docker.com/r/amazon/aws-for-fluent-bit), and a set of highly +available regional Amazon ECR repositories. For more information, see the +[AWS for Fluent Bit GitHub repository](https://github.com/aws/aws-for-fluent-bit#public-images). -The AWS for Fluent Bit image uses a custom versioning scheme because it contains multiple projects. To see what each release contains, check out the [release notes on GitHub](https://github.com/aws/aws-for-fluent-bit/releases). +The AWS for Fluent Bit image uses a custom versioning scheme because it contains +multiple projects. To see what each release contains, see the [release notes on +GitHub](https://github.com/aws/aws-for-fluent-bit/releases). ## SSM Public Parameters -AWS vends SSM Public Parameters with the regional repository link for each image. These parameters can be queried by any AWS account. +AWS vends SSM public parameters with the regional repository link for each image. +These parameters can be queried by any AWS account. To see a list of available version tags in a given region, run the following command: -```text +```bash aws ssm get-parameters-by-path --region eu-central-1 --path /aws/service/aws-for-fluent-bit/ --query 'Parameters[*].Name' ``` To see the ECR repository URI for a given image tag in a given region, run the following: -```text -$ aws ssm get-parameter --region ap-northeast-1 --name /aws/service/aws-for-fluent-bit/2.0.0 +```bash +aws ssm get-parameter --region ap-northeast-1 --name /aws/service/aws-for-fluent-bit/2.0.0 ``` You can use these SSM public parameters as parameters in your CloudFormation templates: @@ -51,4 +63,3 @@ Parameters: Type: AWS::SSM::Parameter::Value<String> Default: /aws/service/aws-for-fluent-bit/latest ``` - diff --git a/installation/buildroot-embedded-linux.md b/installation/buildroot-embedded-linux.md new file mode 100644 index 000000000..a457f1c63 --- /dev/null +++ b/installation/buildroot-embedded-linux.md @@ -0,0 +1,27 @@ +# Buildroot / Embedded Linux + +Install Fluent Bit in your embedded Linux system. + +## Install + +To install, select Fluent Bit in your `defconfig`. +See the `Config.in` file for all configuration options. + +```text +BR2_PACKAGE_FLUENT_BIT=y +``` + +## Run + +The default configuration file is written to: + +```text +/etc/fluent-bit/fluent-bit.conf +``` + +Fluent Bit is started by the `S99fluent-bit` script. + +## Support + +All configurations with a toolchain that supports threads and dynamic library +linking are supported. diff --git a/installation/docker.md b/installation/docker.md index c2c0a6025..b4db66211 100644 --- a/installation/docker.md +++ b/installation/docker.md @@ -3,20 +3,81 @@ Fluent Bit container images are available on Docker Hub ready for production usage. Current available images can be deployed in multiple architectures. -## Quick Start +## Start Docker -Get started by simply typing the following command: +Use the following command to start Docker with Fluent Bit: ```shell docker run -ti cr.fluentbit.io/fluent/fluent-bit ``` -## Tags and Versions +## Tags and versions -The following table describes the Linux container tags that are available on Docker Hub [fluent/fluent-bit](https://hub.docker.com/r/fluent/fluent-bit/) repository: +The following table describes the Linux container tags that are available on Docker +Hub [fluent/fluent-bit](https://hub.docker.com/r/fluent/fluent-bit/) repository: | Tag(s) | Manifest Architectures | Description | | ------------ | ------------------------- | -------------------------------------------------------------- | +| 4.0.0-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 4.0.0 | x86_64, arm64v8, arm32v7, s390x | Release [v4.0.0](https://fluentbit.io/announcements/v4.0.0/) | +| 3.2.10-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.10 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.10](https://fluentbit.io/announcements/v3.2.10/) | +| 3.2.9-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.9 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.9](https://fluentbit.io/announcements/v3.2.9/) | +| 3.2.8-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.8 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.8](https://fluentbit.io/announcements/v3.2.8/) | +| 3.2.7-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.7 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.7](https://fluentbit.io/announcements/v3.2.7/) | +| 3.2.6-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.6 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.6](https://fluentbit.io/announcements/v3.2.6/) | +| 3.2.5-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.5 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.5](https://fluentbit.io/announcements/v3.2.5/) | +| 3.2.4-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.4 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.4](https://fluentbit.io/announcements/v3.2.4/) | +| 3.2.3-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.3 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.3](https://fluentbit.io/announcements/v3.2.3/) | +| 3.2.2-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.2 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.2](https://fluentbit.io/announcements/v3.2.2/) | +| 3.2.1-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.2.1 | x86_64, arm64v8, arm32v7, s390x | Release [v3.2.1](https://fluentbit.io/announcements/v3.2.1/) | +| 3.1.10-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.10 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.10](https://fluentbit.io/announcements/v3.1.10/) | +| 3.1.9-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.9 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.9](https://fluentbit.io/announcements/v3.1.9/) | +| 3.1.8-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.8 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.8](https://fluentbit.io/announcements/v3.1.8/) | +| 3.1.7-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.7 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.7](https://fluentbit.io/announcements/v3.1.7/) | +| 3.1.6-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.6 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.6](https://fluentbit.io/announcements/v3.1.6/) | +| 3.1.5-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.5 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.5](https://fluentbit.io/announcements/v3.1.5/) | +| 3.1.4-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.4 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.4](https://fluentbit.io/announcements/v3.1.4/) | +| 3.1.3-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.3 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.3](https://fluentbit.io/announcements/v3.1.3/) | +| 3.1.2-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.2 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.2](https://fluentbit.io/announcements/v3.1.2/) | +| 3.1.1-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.1 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.1](https://fluentbit.io/announcements/v3.1.1/) | +| 3.1.0-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.1.0 | x86_64, arm64v8, arm32v7, s390x | Release [v3.1.0](https://fluentbit.io/announcements/v3.1.0/) | +| 3.0.7-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.0.7 | x86_64, arm64v8, arm32v7, s390x | Release [v3.0.7](https://fluentbit.io/announcements/v3.0.7/) | +| 3.0.6-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.0.6 | x86_64, arm64v8, arm32v7, s390x | Release [v3.0.6](https://fluentbit.io/announcements/v3.0.6/) | +| 3.0.5-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.0.5 | x86_64, arm64v8, arm32v7, s390x | Release [v3.0.5](https://fluentbit.io/announcements/v3.0.5/) | +| 3.0.4-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.0.4 | x86_64, arm64v8, arm32v7, s390x | Release [v3.0.4](https://fluentbit.io/announcements/v3.0.4/) | +| 3.0.3-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.0.3 | x86_64, arm64v8, arm32v7, s390x | Release [v3.0.3](https://fluentbit.io/announcements/v3.0.3/) | +| 3.0.2-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.0.2 | x86_64, arm64v8, arm32v7, s390x | Release [v3.0.2](https://fluentbit.io/announcements/v3.0.2/) | +| 3.0.1-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.0.1 | x86_64, arm64v8, arm32v7, s390x | Release [v3.0.1](https://fluentbit.io/announcements/v3.0.1/) | +| 3.0.0-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | +| 3.0.0 | x86_64, arm64v8, arm32v7, s390x | Release [v3.0.0](https://fluentbit.io/announcements/v3.0.0/) | | 2.2.2-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | | 2.2.2 | x86_64, arm64v8, arm32v7, s390x | Release [v2.2.2](https://fluentbit.io/announcements/v2.2.2/) | | 2.2.1-debug | x86_64, arm64v8, arm32v7, s390x | Debug images | @@ -41,69 +102,77 @@ The following table describes the Linux container tags that are available on Doc | 2.1.3-debug | x86_64, arm64v8, arm32v7 | Debug images | | 2.1.2 | x86_64, arm64v8, arm32v7 | Release [v2.1.2](https://fluentbit.io/announcements/v2.1.2/) | | 2.1.2-debug | x86_64, arm64v8, arm32v7 | Debug images | -| 2.1.1 | x86\_64, arm64v8, arm32v7 | Release [v2.1.1](https://fluentbit.io/announcements/v2.1.1/) | -| 2.1.1-debug | x86\_64, arm64v8, arm32v7 | v2.1.x releases (production + debug) | -| 2.1.0 | x86\_64, arm64v8, arm32v7 | Release [v2.1.0](https://fluentbit.io/announcements/v2.1.0/) | -| 2.1.0-debug | x86\_64, arm64v8, arm32v7 | v2.1.x releases (production + debug) | -| 2.0.11 | x86\_64, arm64v8, arm32v7 | Release [v2.0.11](https://fluentbit.io/announcements/v2.0.11/) | -| 2.0.11-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.10 | x86\_64, arm64v8, arm32v7 | Release [v2.0.10](https://fluentbit.io/announcements/v2.0.10/) | -| 2.0.10-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.9 | x86\_64, arm64v8, arm32v7 | Release [v2.0.9](https://fluentbit.io/announcements/v2.0.9/) | -| 2.0.9-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.8 | x86\_64, arm64v8, arm32v7 | Release [v2.0.8](https://fluentbit.io/announcements/v2.0.8/) | -| 2.0.8-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.6 | x86\_64, arm64v8, arm32v7 | Release [v2.0.6](https://fluentbit.io/announcements/v2.0.6/) | -| 2.0.6-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.5 | x86\_64, arm64v8, arm32v7 | Release [v2.0.5](https://fluentbit.io/announcements/v2.0.5/) | -| 2.0.5-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.4 | x86\_64, arm64v8, arm32v7 | Release [v2.0.4](https://fluentbit.io/announcements/v2.0.4/) | -| 2.0.4-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.3 | x86\_64, arm64v8, arm32v7 | Release [v2.0.3](https://fluentbit.io/announcements/v2.0.3/) | -| 2.0.3-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.2 | x86\_64, arm64v8, arm32v7 | Release [v2.0.2](https://fluentbit.io/announcements/v2.0.2/) | -| 2.0.2-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.1 | x86\_64, arm64v8, arm32v7 | Release [v2.0.1](https://fluentbit.io/announcements/v2.0.1/) | -| 2.0.1-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 2.0.0 | x86\_64, arm64v8, arm32v7 | Release [v2.0.0](https://fluentbit.io/announcements/v2.0.0/) | -| 2.0.0-debug | x86\_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | -| 1.9.9 | x86\_64, arm64v8, arm32v7 | Release [v1.9.9](https://fluentbit.io/announcements/v1.9.9/) | -| 1.9.9-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.8 | x86\_64, arm64v8, arm32v7 | Release [v1.9.8](https://fluentbit.io/announcements/v1.9.8/) | -| 1.9.8-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.7 | x86\_64, arm64v8, arm32v7 | Release [v1.9.7](https://fluentbit.io/announcements/v1.9.7/) | -| 1.9.7-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.6 | x86\_64, arm64v8, arm32v7 | Release [v1.9.6](https://fluentbit.io/announcements/v1.9.6/) | -| 1.9.6-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.5 | x86\_64, arm64v8, arm32v7 | Release [v1.9.5](https://fluentbit.io/announcements/v1.9.5/) | -| 1.9.5-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.4 | x86\_64, arm64v8, arm32v7 | Release [v1.9.4](https://fluentbit.io/announcements/v1.9.4/) | -| 1.9.4-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.3 | x86\_64, arm64v8, arm32v7 | Release [v1.9.3](https://fluentbit.io/announcements/v1.9.3/) | -| 1.9.3-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.2 | x86\_64, arm64v8, arm32v7 | Release [v1.9.2](https://fluentbit.io/announcements/v1.9.2/) | -| 1.9.2-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.1 | x86\_64, arm64v8, arm32v7 | Release [v1.9.1](https://fluentbit.io/announcements/v1.9.1/) | -| 1.9.1-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | -| 1.9.0 | x86\_64, arm64v8, arm32v7 | Release [v1.9.0](https://fluentbit.io/announcements/v1.9.0/) | -| 1.9.0-debug | x86\_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | - -It is strongly suggested that you always use the latest image of Fluent Bit. - -Windows container images are provided from v2.0.6 for Windows Server 2019 and Windows Server 2022. -These can be found as tags on the same Docker Hub registry above. - -## Multi Architecture Images - -Our production stable images are based on [Distroless](https://github.com/GoogleContainerTools/distroless) focusing on security containing just the Fluent Bit binary and minimal system libraries and basic configuration. -We also provide **debug** images for all architectures (from 1.9.0+) which contain a full (Debian) shell and package manager that can be used to troubleshoot or for testing purposes. - -From a deployment perspective, there is no need to specify an architecture, the container client tool that pulls the image gets the proper layer for the running architecture. +| 2.1.1 | x86_64, arm64v8, arm32v7 | Release [v2.1.1](https://fluentbit.io/announcements/v2.1.1/) | +| 2.1.1-debug | x86_64, arm64v8, arm32v7 | v2.1.x releases (production + debug) | +| 2.1.0 | x86_64, arm64v8, arm32v7 | Release [v2.1.0](https://fluentbit.io/announcements/v2.1.0/) | +| 2.1.0-debug | x86_64, arm64v8, arm32v7 | v2.1.x releases (production + debug) | +| 2.0.11 | x86_64, arm64v8, arm32v7 | Release [v2.0.11](https://fluentbit.io/announcements/v2.0.11/) | +| 2.0.11-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.10 | x86_64, arm64v8, arm32v7 | Release [v2.0.10](https://fluentbit.io/announcements/v2.0.10/) | +| 2.0.10-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.9 | x86_64, arm64v8, arm32v7 | Release [v2.0.9](https://fluentbit.io/announcements/v2.0.9/) | +| 2.0.9-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.8 | x86_64, arm64v8, arm32v7 | Release [v2.0.8](https://fluentbit.io/announcements/v2.0.8/) | +| 2.0.8-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.6 | x86_64, arm64v8, arm32v7 | Release [v2.0.6](https://fluentbit.io/announcements/v2.0.6/) | +| 2.0.6-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.5 | x86_64, arm64v8, arm32v7 | Release [v2.0.5](https://fluentbit.io/announcements/v2.0.5/) | +| 2.0.5-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.4 | x86_64, arm64v8, arm32v7 | Release [v2.0.4](https://fluentbit.io/announcements/v2.0.4/) | +| 2.0.4-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.3 | x86_64, arm64v8, arm32v7 | Release [v2.0.3](https://fluentbit.io/announcements/v2.0.3/) | +| 2.0.3-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.2 | x86_64, arm64v8, arm32v7 | Release [v2.0.2](https://fluentbit.io/announcements/v2.0.2/) | +| 2.0.2-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.1 | x86_64, arm64v8, arm32v7 | Release [v2.0.1](https://fluentbit.io/announcements/v2.0.1/) | +| 2.0.1-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 2.0.0 | x86_64, arm64v8, arm32v7 | Release [v2.0.0](https://fluentbit.io/announcements/v2.0.0/) | +| 2.0.0-debug | x86_64, arm64v8, arm32v7 | v2.0.x releases (production + debug) | +| 1.9.9 | x86_64, arm64v8, arm32v7 | Release [v1.9.9](https://fluentbit.io/announcements/v1.9.9/) | +| 1.9.9-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.8 | x86_64, arm64v8, arm32v7 | Release [v1.9.8](https://fluentbit.io/announcements/v1.9.8/) | +| 1.9.8-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.7 | x86_64, arm64v8, arm32v7 | Release [v1.9.7](https://fluentbit.io/announcements/v1.9.7/) | +| 1.9.7-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.6 | x86_64, arm64v8, arm32v7 | Release [v1.9.6](https://fluentbit.io/announcements/v1.9.6/) | +| 1.9.6-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.5 | x86_64, arm64v8, arm32v7 | Release [v1.9.5](https://fluentbit.io/announcements/v1.9.5/) | +| 1.9.5-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.4 | x86_64, arm64v8, arm32v7 | Release [v1.9.4](https://fluentbit.io/announcements/v1.9.4/) | +| 1.9.4-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.3 | x86_64, arm64v8, arm32v7 | Release [v1.9.3](https://fluentbit.io/announcements/v1.9.3/) | +| 1.9.3-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.2 | x86_64, arm64v8, arm32v7 | Release [v1.9.2](https://fluentbit.io/announcements/v1.9.2/) | +| 1.9.2-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.1 | x86_64, arm64v8, arm32v7 | Release [v1.9.1](https://fluentbit.io/announcements/v1.9.1/) | +| 1.9.1-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | +| 1.9.0 | x86_64, arm64v8, arm32v7 | Release [v1.9.0](https://fluentbit.io/announcements/v1.9.0/) | +| 1.9.0-debug | x86_64, arm64v8, arm32v7 | v1.9.x releases (production + debug) | + +It's strongly suggested that you always use the latest image of Fluent Bit. + +Container images for Windows Server 2019 and Windows Server 2022 are provided for +v2.0.6 and later. These can be found as tags on the same Docker Hub registry. + +## Multi-architecture images + +Fluent Bit production stable images are based on +[Distroless](https://github.com/GoogleContainerTools/distroless). Focusing on +security, these images contain only the Fluent Bit binary and minimal system +libraries and basic configuration. + +Debug images are available for all architectures (for 1.9.0 and later), and contain +a full Debian shell and package manager that can be used to troubleshoot or for +testing purposes. + +From a deployment perspective, there's no need to specify an architecture. The +container client tool that pulls the image gets the proper layer for the running +architecture. ## Verify signed container images -1.9 and 2.0 container images are signed using Cosign/Sigstore. -These signatures can be verified using `cosign` ([install guide](https://docs.sigstore.dev/cosign/installation/)): +Version 1.9 and 2.0 container images are signed using Cosign/Sigstore. +Verify these signatures using `cosign` ([install guide](https://docs.sigstore.dev/cosign/installation/)): ```shell $ cosign verify --key "https://packages.fluentbit.io/fluentbit-cosign.pub" fluent/fluent-bit:2.0.6 @@ -116,83 +185,101 @@ The following checks were performed on each of these signatures: [{"critical":{"identity":{"docker-reference":"index.docker.io/fluent/fluent-bit"},"image":{"docker-manifest-digest":"sha256:c740f90b07f42823d4ecf4d5e168f32ffb4b8bcd87bc41df8f5e3d14e8272903"},"type":"cosign container image signature"},"optional":{"release":"2.0.6","repo":"fluent/fluent-bit","workflow":"Release from staging"}}] ``` -Note: replace `cosign` above with the binary installed if it has a different name (e.g. `cosign-linux-amd64`). +Replace `cosign` with the binary installed if it has a different name +(for example, `cosign-linux-amd64`). -Keyless signing is also provided but this is still experimental: +Keyless signing is also provided but is still experimental: ```shell COSIGN_EXPERIMENTAL=1 cosign verify fluent/fluent-bit:2.0.6 ``` -Note: `COSIGN_EXPERIMENTAL=1` is used to allow verification of images signed in KEYLESS mode. -To learn more about keyless signing, please refer to [Keyless Signatures](https://github.com/sigstore/cosign/blob/main/KEYLESS.md#keyless-signatures). +`COSIGN_EXPERIMENTAL=1` is used to allow verification of images signed in keyless +mode. To learn more about keyless signing, see the +[Sigstore keyless signature](https://docs.sigstore.dev/cosign/signing/overview/) +documentation. -## Getting Started +## Get started -Download the last stable image from 2.0 series: +1. Download the last stable image from 2.0 series: -```shell -docker pull cr.fluentbit.io/fluent/fluent-bit:2.0 -``` + ```shell + docker pull cr.fluentbit.io/fluent/fluent-bit:2.0 + ``` -Once the image is in place, now run the following (useless) test which makes Fluent Bit measure CPU usage by the container: +1. After the image is in place, run the following test which makes Fluent Bit + measure CPU usage by the container: -```shell -docker run -ti cr.fluentbit.io/fluent/fluent-bit:2.0 \ - -i cpu -o stdout -f 1 -``` + ```shell + docker run -ti cr.fluentbit.io/fluent/fluent-bit:2.0 \ + -i cpu -o stdout -f 1 + ``` -That command will let Fluent Bit measure CPU usage every second and flush the results to the standard output, e.g: +That command lets Fluent Bit measure CPU usage every second and flushes the results +to the standard output. For example: ```shell [2019/10/01 12:29:02] [ info] [engine] started [0] cpu.0: [1504290543.000487750, {"cpu_p"=>0.750000, "user_p"=>0.250000, "system_p"=>0.500000, "cpu0.p_cpu"=>0.000000, "cpu0.p_user"=>0.000000, "cpu0.p_system"=>0.000000, "cpu1.p_cpu"=>1.000000, "cpu1.p_user"=>0.000000, "cpu1.p_system"=>1.000000, "cpu2.p_cpu"=>1.000000, "cpu2.p_user"=>1.000000, "cpu2.p_system"=>0.000000, "cpu3.p_cpu"=>0.000000, "cpu3.p_user"=>0.000000, "cpu3.p_system"=>0.000000}] ``` -## F.A.Q - -### Why there is no Fluent Bit Docker image based on Alpine Linux ? - -Alpine Linux uses Musl C library instead of Glibc. Musl is not fully compatible with Glibc which generated many issues in the following areas when used with Fluent Bit: - -* Memory Allocator: to run Fluent Bit properly in high-load environments, we use Jemalloc as a default memory allocator which reduce fragmentation and provides better performance for our needs. Jemalloc cannot run smoothly with Musl and requires extra work. -* Alpine Linux Musl functions bootstrap have a compatibility issue when loading Golang shared libraries, this generate problems when trying to load Golang output plugins in Fluent Bit. -* Alpine Linux Musl Time format parser does not support Glibc extensions -* Maintainers preference in terms of base image due to security and maintenance reasons are Distroless and Debian. - -### Why use distroless containers ? - -Briefly tackled in a [blog post](https://calyptia.com/2022/01/26/all-your-arch-are-belong-to-us/#security) which links out to the following possibly opposing views: - -* <https://hackernoon.com/distroless-containers-hype-or-true-value-2rfl3wat> -* <https://www.redhat.com/en/blog/why-distroless-containers-arent-security-solution-you-think-they-are> - -The reasons for using Distroless are fairly well covered here: <https://github.com/GoogleContainerTools/distroless#why-should-i-use-distroless-images> - -* Only include what you need, reduce the attack surface available. -* Reduces size so improves perfomance as well. -* Reduces false positives on scans (and reduces resources required for scanning). -* Reduces supply chain security requirements to just what you need. -* Helps prevent unauthorised processes or users interacting with the container. -* Less need to harden the container (and container runtime, K8S, etc.). -* Faster CICD processes. - -With any choice of course there are downsides: - -* No shell or package manager to update/add things. - * Generally though dynamic updating is a bad idea in containers as the time it is done affects the outcome: two containers started at different times using the same base image may perform differently or get different dependencies, etc. - * A better approach is to rebuild a new image version but then you can do this with Distroless, however it is harder requiring multistage builds or similar to provide the new dependencies. -* Debugging can be harder. - * More specifically you need applications set up to properly expose information for debugging rather than rely on traditional debug approaches of connecting to processes or dumping memory. This can be an upfront cost vs a runtime cost but does shift left in the development process so hopefully is a reduction overall. -* Assumption that Distroless is secure: nothing is secure (just more or less secure) and there are still exploits so it does not remove the need for securing your system. -* Sometimes you need to use a common base image, e.g. with audit/security/health/etc. hooks integrated, or common base tooling (this could still be Distroless though). - -One other important thing to note is that `exec`'ing into a container will potentially impact resource limits. - -For debugging, debug containers are available now in K8S: <https://kubernetes.io/docs/tasks/debug/debug-application/debug-running-pod/#ephemeral-container> - -* This can be a quite different container from the one you want to investigate (e.g. lots of extra tools or even a different base). -* No resource limits applied to this container - can be good or bad. -* Runs in pod namespaces, just another container that can access everything the others can. -* May need architecture of the pod to share volumes, etc. -* Requires more recent versions of K8S and the container runtime plus RBAC allowing it. +## FAQ + +### Why there is no Fluent Bit Docker image based on Alpine Linux? + +Alpine Linux uses Musl C library instead of Glibc. Musl isn't fully compatible with +Glibc, which generated many issues in the following areas when used with Fluent Bit: + +- Memory Allocator: To run properly in high-load environments, Fluent Bit uses + Jemalloc as a default memory allocator which reduces fragmentation and provides + better performance. Jemalloc can't run smoothly with Musl and requires extra work. +- Alpine Linux Musl functions bootstrap have a compatibility issue when loading + Golang shared libraries. This causes problems when trying to load Golang output + plugins in Fluent Bit. +- Alpine Linux Musl Time format parser doesn't support Glibc extensions. +- The Fluent Bit maintainers' preference for base images are Distroless and + Debian for security and maintenance reasons. + +### Why use Distroless containers? + +The reasons for using Distroless are well covered in +[Why should I use Distroless images?](https://github.com/GoogleContainerTools/distroless#why-should-i-use-distroless-images). + +- Include only what you need, reduce the attack surface available. +- Reduces size and improves performance. +- Reduces false positives on scans (and reduces resources required for scanning). +- Reduces supply chain security requirements to only what you need. +- Helps prevent unauthorised processes or users interacting with the container. +- Less need to harden the container (and container runtime, K8s, and so on). +- Faster CI/CD processes. + +With any choice, there are downsides: + +- No shell or package manager to update or add things. + - Generally, dynamic updating is a bad idea in containers as the time it's done + affects the outcome: two containers started at different times using the same + base image can perform differently or get different dependencies. + - A better approach is to rebuild a new image version. You can do this with + Distroless, but it's harder and requires multistage builds or similar to provide + the new dependencies. +- Debugging can be harder. + - More specifically you need applications set up to properly expose information for + debugging rather than rely on traditional debug approaches of connecting to + processes or dumping memory. This can be an upfront cost versus a runtime cost but + does shift left in the development process so hopefully is a reduction overall. +- Assumption that Distroless is secure: nothing is secure and there are still + exploits so it doesn't remove the need for securing your system. +- Sometimes you need to use a common base image, such as with audits, security, + health, and so on. + +Using `exec` to access a container will potentially impact resource limits. + +For debugging, debug containers are available now in K8S: +<https://kubernetes.io/docs/tasks/debug/debug-application/debug-running-pod/#ephemeral-container> + +- This can be a significantly different container from the one you want to + investigate, with lots of extra tools or even a different base. +- No resource limits applied to this container, which can be good or bad. +- Runs in pod namespaces. It's another container that can access everything the others can. +- Might need architecture of the pod to share volumes or other information. +- Requires more recent versions of K8S and the container runtime plus RBAC allowing it. diff --git a/installation/getting-started-with-fluent-bit.md b/installation/getting-started-with-fluent-bit.md index 6bdc4e2c9..2f50d20f1 100644 --- a/installation/getting-started-with-fluent-bit.md +++ b/installation/getting-started-with-fluent-bit.md @@ -1,44 +1,46 @@ --- -description: The following serves as a guide on how to install/deploy/upgrade Fluent Bit +description: A guide on how to install, deploy, and upgrade Fluent Bit --- +# Get started with Fluent Bit -# Getting Started with Fluent Bit +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=e9732f9c-44a4-46d3-ab87-86138455c698" /> -## Container Deployment +## Container deployment -| Deployment Type | Instructions | +| Deployment Type | instructions | | ----------------- | -------------------------------------------------- | | Kubernetes | [Deploy on Kubernetes](kubernetes.md#installation) | | Docker | [Deploy with Docker](docker.md) | | Containers on AWS | [Deploy on Containers on AWS](aws-container.md) | -## Install on Linux (Packages) +## Install on Linux (packages) -| Operating System | Installation Instructions | -| ---------------------- | -------------------------- | +| Operating System | Installation instructions | +| ---------------------- | ------------------------- | | CentOS / Red Hat | [CentOS 7](linux/redhat-centos.md#install-on-redhat-centos), [CentOS 8](linux/redhat-centos.md#install-on-redhat-centos), [CentOS 9 Stream](linux/redhat-centos.md#install-on-redhat-centos) | | Ubuntu | [Ubuntu 16.04 LTS](linux/ubuntu.md), [Ubuntu 18.04 LTS](linux/ubuntu.md), [Ubuntu 20.04 LTS](linux/ubuntu.md), [Ubuntu 22.04 LTS](linux/ubuntu.md) | | Debian | [Debian 10](linux/debian.md), [Debian 11](linux/debian.md), [Debian 12](linux/debian.md) | | Amazon Linux | [Amazon Linux 2](linux/amazon-linux.md#install-on-amazon-linux-2), [Amazon Linux 2022](linux/amazon-linux.md#amazon-linux-2022) | | Raspbian / Raspberry Pi | [Raspbian 10](linux/raspbian-raspberry-pi.md#raspbian-10-buster), [Raspbian 11](linux/raspbian-raspberry-pi.md#raspbian-11-bullseye) | | Yocto / Embedded Linux | [Yocto / Embedded Linux](yocto-embedded-linux.md#fluent-bit-and-other-architectures) | +| Buildroot / Embedded Linux | [Buildroot / Embedded Linux](buildroot-embedded-linux.md) | -## Install on Windows (Packages) +## Install on Windows (packages) -| Operating System | Installation Instructions | -| ------------------- | ---------------------------------------------------------------------------------------------------------------------------- | +| Operating System | Installation instructions | +| ------------------- | ------------------------- | | Windows Server 2019 | [Windows Server EXE](windows.md#installing-from-exe-installer), [Windows Server ZIP](windows.md#installing-from-zip-archive) | | Windows 10 2019.03 | [Windows EXE](windows.md#installing-from-exe-installer), [Windows ZIP](windows.md#installing-from-zip-archive) | -## Install on macOS (Packages) +## Install on macOS (packages) -| Operating System | Installation Instructions | +| Operating System | Installation instructions | | ------------------- | --------------------------------------------- | | macOS | [Homebrew](macos.md#installing-from-homebrew) | ## Compile from Source (Linux, Windows, FreeBSD, macOS) -| Operating System | Installation Instructions | +| Operating system | Installation instructions | | ---------------- | ----------------------------------------------------------- | | Linux, FreeBSD | [Compile from source](sources/build-and-install.md) | | macOS | [Compile from source](macos.md#compile-from-source) | @@ -46,7 +48,7 @@ description: The following serves as a guide on how to install/deploy/upgrade Fl ## Sandbox Environment -If you are interested in learning about Fluent Bit you can try out the sandbox environment +If you are interested in learning about Fluent Bit you can try out the sandbox environment: {% embed url="https://play.instruqt.com/embed/Fluent/tracks/fluent-bit-getting-started-101?token=em_S2zOzhhDQepM0vDS" %} Fluent Bit Sandbox Environment @@ -54,7 +56,6 @@ Fluent Bit Sandbox Environment ## Enterprise Packages -Fluent Bit packages are also provided by [enterprise providers](https://fluentbit.io/enterprise) for older end of life versions, Unix systems, and additional support and features including aspects like CVE backporting. -A list provided by fluentbit.io/enterprise is provided below - -* [Calyptia Fluent Bit LTS](https://calyptia.com/products/lts-fluentbit/) +Fluent Bit packages are also provided by [enterprise +providers](https://fluentbit.io/enterprise) for older end of life versions, Unix +systems, and additional support and features including aspects like CVE backporting. diff --git a/installation/kubernetes.md b/installation/kubernetes.md index da44eae16..025ba8c23 100644 --- a/installation/kubernetes.md +++ b/installation/kubernetes.md @@ -6,124 +6,117 @@ description: Kubernetes Production Grade Log Processor .png>) -[Fluent Bit](http://fluentbit.io) is a lightweight and extensible **Log Processor** that comes with full support for Kubernetes: +[Fluent Bit](http://fluentbit.io) is a lightweight and extensible log processor +with full support for Kubernetes: -* Process Kubernetes containers logs from the file system or Systemd/Journald. -* Enrich logs with Kubernetes Metadata. -* Centralize your logs in third party storage services like Elasticsearch, InfluxDB, HTTP, etc. +- Process Kubernetes containers logs from the file system or Systemd/Journald. +- Enrich logs with Kubernetes Metadata. +- Centralize your logs in third party storage services like Elasticsearch, InfluxDB, + HTTP, and so on. -## Concepts <a href="#concepts" id="concepts"></a> +## Concepts -Before getting started it is important to understand how Fluent Bit will be deployed. Kubernetes manages a cluster of _nodes_, so our log agent tool will need to run on every node to collect logs from every _POD_, hence Fluent Bit is deployed as a DaemonSet (a POD that runs on every _node_ of the cluster). +Before getting started it's important to understand how Fluent Bit will be deployed. +Kubernetes manages a cluster of nodes. The Fluent Bit log agent tool needs to run +on every node to collect logs from every pod. Fluent Bit is deployed as a +DaemonSet, which is a pod that runs on every node of the cluster. -When Fluent Bit runs, it will read, parse and filter the logs of every POD and will enrich each entry with the following information (metadata): +When Fluent Bit runs, it reads, parses, and filters the logs of every pod. In +addition, Fluent Bit adds metadata to each entry using the +[Kubernetes](../pipeline/filters/kubernetes) filter plugin. -* Pod Name -* Pod ID -* Container Name -* Container ID -* Labels -* Annotations +The Kubernetes filter plugin talks to the Kubernetes API Server to retrieve relevant +information such as the `pod_id`, `labels`, and `annotations`. Other fields, such as +`pod_name`, `container_id`, and `container_name`, are retrieved locally from the log +file names. All of this is handled automatically, and no intervention is required from a +configuration aspect. -To obtain this information, a built-in filter plugin called _kubernetes_ talks to the Kubernetes API Server to retrieve relevant information such as the _pod\_id_, _labels_ and _annotations_, other fields such as _pod\_name_, _container\_id_ and _container\_name_ are retrieved locally from the log file names. All of this is handled automatically, no intervention is required from a configuration aspect. +## Installation -> Our Kubernetes Filter plugin is fully inspired by the [Fluentd Kubernetes Metadata Filter](https://github.com/fabric8io/fluent-plugin-kubernetes\_metadata\_filter) written by [Jimmi Dyson](https://github.com/jimmidyson). +[Fluent Bit](http://fluentbit.io) should be deployed as a DaemonSet, so it will +be available on every node of your Kubernetes cluster. -## Installation <a href="#installation" id="installation"></a> - -[Fluent Bit](http://fluentbit.io) should be deployed as a DaemonSet, so on that way it will be available on every node of your Kubernetes cluster. - -The recommended way to deploy Fluent Bit is with the official Helm Chart: https://github.com/fluent/helm-charts +The recommended way to deploy Fluent Bit for Kubernetes is with the official Helm +Chart at <https://github.com/fluent/helm-charts>. ### Note for OpenShift -If you are using Red Hat OpenShift you will also need to set up security context constraints (SCC): - -``` -$ kubectl create -f https://raw.githubusercontent.com/fluent/fluent-bit-kubernetes-logging/master/fluent-bit-openshift-security-context-constraints.yaml -``` +If you are using Red Hat OpenShift you must set up Security Context Constraints (SCC) +using the relevant option in the helm chart. ### Installing with Helm Chart -[Helm](https://helm.sh) is a package manager for Kubernetes and allows you to quickly deploy application packages into your running cluster. Fluent Bit is distributed via a helm chart found in the Fluent Helm Charts repo: [https://github.com/fluent/helm-charts](https://github.com/fluent/helm-charts). +[Helm](https://helm.sh) is a package manager for Kubernetes and lets you deploy +application packages into your running cluster. Fluent Bit is distributed using a Helm +chart found in the [Fluent Helm Charts repository](https://github.com/fluent/helm-charts). -To add the Fluent Helm Charts repo use the following command +Use the following command to add the Fluent Helm charts repository -``` +```shell helm repo add fluent https://fluent.github.io/helm-charts ``` -To validate that the repo was added you can run `helm search repo fluent` to ensure the charts were added. The default chart can then be installed by running the following +To validate that the repository was added, run `helm search repo fluent` to +ensure the charts were added. The default chart can then be installed by running the +following command: -``` +```shell helm upgrade --install fluent-bit fluent/fluent-bit ``` ### Default Values -The default chart values include configuration to read container logs, with Docker parsing, systemd logs apply Kubernetes metadata enrichment and finally output to an Elasticsearch cluster. You can modify the values file included [https://github.com/fluent/helm-charts/blob/master/charts/fluent-bit/values.yaml](https://github.com/fluent/helm-charts/blob/master/charts/fluent-bit/values.yaml) to specify additional outputs, health checks, monitoring endpoints, or other configuration options. +The default chart values include configuration to read container logs. With Docker +parsing, Systemd logs apply Kubernetes metadata enrichment, and output to an +Elasticsearch cluster. You can modify the +[included values file](https://github.com/fluent/helm-charts/blob/master/charts/fluent-bit/values.yaml) +to specify additional outputs, health checks, monitoring endpoints, or other +configuration options. ## Details -The default configuration of Fluent Bit makes sure of the following: +The default configuration of Fluent Bit ensures the following: -* Consume all containers logs from the running Node. -* The [Tail input plugin](https://docs.fluentbit.io/manual/v/1.0/input/tail) will not append more than **5MB** into the engine until they are flushed to the Elasticsearch backend. This limit aims to provide a workaround for [backpressure](https://docs.fluentbit.io/manual/v/1.0/configuration/backpressure) scenarios. -* The Kubernetes filter will enrich the logs with Kubernetes metadata, specifically _labels_ and _annotations_. The filter only goes to the API Server when it cannot find the cached info, otherwise it uses the cache. -* The default backend in the configuration is Elasticsearch set by the [Elasticsearch Output Plugin](../pipeline/outputs/elasticsearch.md). It uses the Logstash format to ingest the logs. If you need a different Index and Type, please refer to the plugin option and do your own adjustments. -* There is an option called **Retry\_Limit** set to False, that means if Fluent Bit cannot flush the records to Elasticsearch it will re-try indefinitely until it succeed. +- Consume all containers logs from the running node and parse them with either + the `docker` or `cri` multi-line parser. +- Persist how far it got into each file it's tailing so if a pod is restarted it + picks up from where it left off. +- The Kubernetes filter adds Kubernetes metadata, specifically `labels` and + `annotations`. The filter only contacts the API Server when it can't find the + cached information, otherwise it uses the cache. +- The default backend in the configuration is Elasticsearch set by the + [Elasticsearch Output Plugin](../pipeline/outputs/elasticsearch.md). + It uses the Logstash format to ingest the logs. If you need a different `Index` + and `Type`, refer to the plugin option and update as needed. +- There is an option called `Retry_Limit`, which is set to `False`. If Fluent Bit + can't flush the records to Elasticsearch, it will retry indefinitely until it + succeeds. -## Container Runtime Interface (CRI) parser +## Windows deployment -Fluent Bit by default assumes that logs are formatted by the Docker interface standard. However, when using CRI you can run into issues with malformed JSON if you do not modify the parser used. Fluent Bit includes a CRI log parser that can be used instead. An example of the parser is seen below: - -``` -# CRI Parser -[PARSER] - # http://rubular.com/r/tjUt3Awgg4 - Name cri - Format regex - Regex ^(?<time>[^ ]+) (?<stream>stdout|stderr) (?<logtag>[^ ]*) (?<message>.*)$ - Time_Key time - Time_Format %Y-%m-%dT%H:%M:%S.%L%z -``` - -To use this parser change the Input section for your configuration from `docker` to `cri` - -``` -[INPUT] - Name tail - Path /var/log/containers/*.log - Parser cri - Tag kube.* - Mem_Buf_Limit 5MB - Skip_Long_Lines On -``` - -## Windows Deployment - -Since v1.5.0, Fluent Bit supports deployment to Windows pods. +Fluent Bit v1.5.0 and later supports deployment to Windows pods. ### Log files overview When deploying Fluent Bit to Kubernetes, there are three log files that you need to pay attention to. -`C:\k\kubelet.err.log` +- `C:\k\kubelet.err.log` -* This is the error log file from kubelet daemon running on host. -* You will need to retain this file for future troubleshooting (to debug deployment failures etc.) + This is the error log file from kubelet daemon running on host. Retain this file + for future troubleshooting, including debugging deployment failures. -`C:\var\log\containers\<pod>_<namespace>_<container>-<docker>.log` +- `C:\var\log\containers\<pod>_<namespace>_<container>-<docker>.log` -* This is the main log file you need to watch. Configure Fluent Bit to follow this file. -* It is actually a symlink to the Docker log file in `C:\ProgramData\`, with some additional metadata on its file name. + This is the main log file you need to watch. Configure Fluent Bit to follow this + file. It's a symlink to the Docker log file in `C:\ProgramData\`, with some + additional metadata on the file's name. -`C:\ProgramData\Docker\containers\<docker>\<docker>.log` +- `C:\ProgramData\Docker\containers\<docker>\<docker>.log` -* This is the log file produced by Docker. -* Normally you don't directly read from this file, but you need to make sure that this file is visible from Fluent Bit. + This is the log file produced by Docker. Normally you don't directly read from this + file, but you need to make sure that this file is visible from Fluent Bit. -Typically, your deployment yaml contains the following volume configuration. +Typically, your deployment YAML contains the following volume configuration. ```yaml spec: @@ -151,7 +144,8 @@ spec: ### Configure Fluent Bit -Assuming the basic volume configuration described above, you can apply the following config to start logging. You can visualize this configuration [here (Sign-up required)](https://calyptia.com/free-trial) +Assuming the basic volume configuration described previously, you can apply the +following configuration to start logging: ```yaml fluent-bit.conf: | @@ -193,14 +187,18 @@ parsers.conf: | ### Mitigate unstable network on Windows pods -Windows pods often lack working DNS immediately after boot ([#78479](https://github.com/kubernetes/kubernetes/issues/78479)). To mitigate this issue, `filter_kubernetes` provides a built-in mechanism to wait until the network starts up: +Windows pods often lack working DNS immediately after boot +([#78479](https://github.com/kubernetes/kubernetes/issues/78479)). To mitigate this +issue, `filter_kubernetes` provides a built-in mechanism to wait until the network +starts up: -* `DNS_Retries` - Retries N times until the network start working (6) -* `DNS_Wait_Time` - Lookup interval between network status checks (30) +- `DNS_Retries`: Retries N times until the network start working (6) +- `DNS_Wait_Time`: Lookup interval between network status checks (30) -By default, Fluent Bit waits for 3 minutes (30 seconds x 6 times). If it's not enough for you, tweak the configuration as follows. +By default, Fluent Bit waits for three minutes (30 seconds x 6 times). If it's not enough +for you, update the configuration as follows: -``` +```python [filter] Name kubernetes ... diff --git a/installation/linux/README.md b/installation/linux/README.md index cb006ff0a..700424251 100644 --- a/installation/linux/README.md +++ b/installation/linux/README.md @@ -1,19 +1,22 @@ -# Linux Packages +# Linux packages -The most secure option is to create the repositories acccording to the instructions for your specific OS. +The most secure option is to create the repositories according to the instructions +for your specific OS. -A simple installation script is provided to be used for most Linux targets. +An installation script is provided for use with most Linux targets. This will by default install the most recent version released. ```bash curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh ``` -This is purely a convenience helper and should always be validated prior to use. +This is a helper and should always be validated prior to use. ## GPG key updates -From the 1.9.0 and 1.8.15 releases please note that the GPG key has been updated at [https://packages.fluentbit.io/fluentbit.key](https://packages.fluentbit.io/fluentbit.key) so ensure this new one is added. +For the 1.9.0 and 1.8.15 releases and later, the GPG key +[has been updated](https://packages.fluentbit.io/fluentbit.key). Ensure the new +key is added. The GPG Key fingerprint of the new key is: @@ -22,7 +25,8 @@ C3C0 A285 34B9 293E AF51 FABD 9F9D DC08 3888 C1CD Fluentbit releases (Releases signing key) <releases@fluentbit.io> ``` -The previous key is still available at [https://packages.fluentbit.io/fluentbit-legacy.key](https://packages.fluentbit.io/fluentbit-legacy.key) and may be required to install previous versions. +The previous key is [still available](https://packages.fluentbit.io/fluentbit-legacy.key) +and might be required to install previous versions. The GPG Key fingerprint of the old key is: @@ -30,9 +34,10 @@ The GPG Key fingerprint of the old key is: F209 D876 2A60 CD49 E680 633B 4FF8 368B 6EA0 722A ``` -Refer to the [supported platform documentation](./../supported-platforms.md) to see which platforms are supported in each release. +Refer to the [supported platform documentation](./../supported-platforms.md) to see +which platforms are supported in each release. ## Migration to Fluent Bit -From version 1.9, `td-agent-bit` is a deprecated package and is removed after 1.9.9. -The correct package name to use now is `fluent-bit`. +For version 1.9 and later, `td-agent-bit` is a deprecated package and is removed +after 1.9.9. The correct package name to use now is `fluent-bit`. diff --git a/installation/linux/amazon-linux.md b/installation/linux/amazon-linux.md index 186763bd8..64d4519bc 100644 --- a/installation/linux/amazon-linux.md +++ b/installation/linux/amazon-linux.md @@ -2,35 +2,35 @@ ## Install on Amazon Linux -Fluent Bit is distributed as **fluent-bit** package and is available for the latest Amazon Linux 2 and Amazon Linux 2023. -The following architectures are supported +Fluent Bit is distributed as the `fluent-bit` package and is available for the latest +Amazon Linux 2 and Amazon Linux 2023. The following architectures are supported -* x86\_64 -* aarch64 / arm64v8 +- x86_64 +- aarch64 / arm64v8 + +Amazon Linux 2022 is no longer supported. ## Single line install -A simple installation script is provided to be used for most Linux targets. -This will always install the most recent version released. +Fluent Bit provides an installation script to use for most Linux targets. +This will always install the most recently released version. -```bash +```bash copy curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh ``` -This is purely a convenience helper and should always be validated prior to use. -The recommended secure deployment approach is to follow the instructions below. - -### Amazon Linux 2022 - -Amazon Linux 2022 was previously supported but is removed since it became GA Amazon Linux 2023 +This is a convenience helper and should always be validated prior to use. +The recommended secure deployment approach is to use the following instructions: ## Configure Yum -We provide **fluent-bit** through a Yum repository. In order to add the repository reference to your system, please add a new file called _fluent-bit.repo_ in _/etc/yum.repos.d/_ with the following content: +The `fluent-bit` is provided through a Yum repository. To add the repository +reference to your system, add a new file called `fluent-bit.repo` in +`/etc/yum.repos.d/` with the following content: ### Amazon Linux 2 -```config +```text copy [fluent-bit] name = Fluent Bit baseurl = https://packages.fluentbit.io/amazonlinux/2/ @@ -41,7 +41,7 @@ enabled=1 ### Amazon Linux 2023 -```config +```text copy [fluent-bit] name = Fluent Bit baseurl = https://packages.fluentbit.io/amazonlinux/2023/ @@ -50,11 +50,14 @@ gpgkey=https://packages.fluentbit.io/fluentbit.key enabled=1 ``` -Note: we encourage you always enable the _gpgcheck_ for security reasons. All our packages are signed. +You should always enable `gpgcheck` for security reasons. All Fluent Bit packages +are signed. ### Updated key from March 2022 -From the 1.9.0 and 1.8.15 releases please note that the GPG key has been updated at [https://packages.fluentbit.io/fluentbit.key](https://packages.fluentbit.io/fluentbit.key) so ensure this new one is added. +For the 1.9.0 and 1.8.15 and later releases, the +[GPG key has been updated](https://packages.fluentbit.io/fluentbit.key). Ensure +this new one is added. The GPG Key fingerprint of the new key is: @@ -63,7 +66,8 @@ C3C0 A285 34B9 293E AF51 FABD 9F9D DC08 3888 C1CD Fluentbit releases (Releases signing key) <releases@fluentbit.io> ``` -The previous key is still available at [https://packages.fluentbit.io/fluentbit-legacy.key](https://packages.fluentbit.io/fluentbit-legacy.key) and may be required to install previous versions. +The previous key is [still available](https://packages.fluentbit.io/fluentbit-legacy.key) +and might be required to install previous versions. The GPG Key fingerprint of the old key is: @@ -71,19 +75,20 @@ The GPG Key fingerprint of the old key is: F209 D876 2A60 CD49 E680 633B 4FF8 368B 6EA0 722A ``` -Refer to the [supported platform documentation](../supported-platforms.md) to see which platforms are supported in each release. +Refer to the [supported platform documentation](../supported-platforms.md) to see +which platforms are supported in each release. ### Install -Once your repository is configured, run the following command to install it: +1. After your repository is configured, run the following command to install it: -```bash -sudo yum install fluent-bit -``` + ```bash copy + sudo yum install fluent-bit + ``` -Now the following step is to instruct _systemd_ to enable the service: +1. Instruct `systemd` to enable the service: -```bash +```bash copy sudo systemctl start fluent-bit ``` @@ -100,4 +105,6 @@ $ systemctl status fluent-bit ... ``` -The default configuration of **fluent-bit** is collecting metrics of CPU usage and sending the records to the standard output, you can see the outgoing data in your _/var/log/messages_ file. +The default Fluent Bit configuration collect metrics of CPU usage and sends the +records to the standard output. You can see the outgoing data in your +`/var/log/messages` file. diff --git a/installation/linux/debian.md b/installation/linux/debian.md index a3246f4d8..d6e9e22f2 100644 --- a/installation/linux/debian.md +++ b/installation/linux/debian.md @@ -1,35 +1,43 @@ # Debian -Fluent Bit is distributed as **fluent-bit** package and is available for the latest (and legacy) stable Debian systems: Bookworm and Bullseye. +Fluent Bit is distributed as the `fluent-bit` package and is available for the latest +stable CentOS system. + The following architectures are supported -* x86\_64 -* aarch64 / arm64v8 +- `x86_64` +- `aarch64` +- `arm64v8` ## Single line install -A simple installation script is provided to be used for most Linux targets. -This will always install the most recent version released. +Fluent Bit provides an installation script to use for most Linux targets. +This will always install the most recently released version. -```bash +```bash copy curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh ``` -This is purely a convenience helper and should always be validated prior to use. -The recommended secure deployment approach is to follow the instructions below. +This is a convenience helper and should always be validated prior to use. +The recommended secure deployment approach is to use the following instructions: ## Server GPG key -The first step is to add our server GPG key to your keyring, on that way you can get our signed packages. -Follow the official Debian wiki guidance: <https://wiki.debian.org/DebianRepository/UseThirdParty#OpenPGP\_Key\_distribution> +The first step is to add the Fluent Bit server GPG key to your keyring to ensure +you can get the correct signed packages. -```bash +Follow the official +[Debian wiki guidance](https://wiki.debian.org/DebianRepository/UseThirdParty#OpenPGP_Key_distribution). + +```bash copy curl https://packages.fluentbit.io/fluentbit.key | gpg --dearmor > /usr/share/keyrings/fluentbit-keyring.gpg ``` ### Updated key from March 2022 -From the 1.9.0 and 1.8.15 releases please note that the GPG key has been updated at [https://packages.fluentbit.io/fluentbit.key](https://packages.fluentbit.io/fluentbit.key) so ensure this new one is added. +For the 1.9.0 and 1.8.15 and later releases, the +[GPG key has been updated](https://packages.fluentbit.io/fluentbit.key). Ensure +this new one is added. The GPG Key fingerprint of the new key is: @@ -38,7 +46,8 @@ C3C0 A285 34B9 293E AF51 FABD 9F9D DC08 3888 C1CD Fluentbit releases (Releases signing key) <releases@fluentbit.io> ``` -The previous key is still available at [https://packages.fluentbit.io/fluentbit-legacy.key](https://packages.fluentbit.io/fluentbit-legacy.key) and may be required to install previous versions. +The previous key is [still available](https://packages.fluentbit.io/fluentbit-legacy.key) +and might be required to install previous versions. The GPG Key fingerprint of the old key is: @@ -46,43 +55,50 @@ The GPG Key fingerprint of the old key is: F209 D876 2A60 CD49 E680 633B 4FF8 368B 6EA0 722A ``` -Refer to the [supported platform documentation](../supported-platforms.md) to see which platforms are supported in each release. +Refer to the [supported platform documentation](../supported-platforms.md) to see +which platforms are supported in each release. ## Update your sources lists -On Debian, you need to add our APT server entry to your sources lists, please add the following content at bottom of your **/etc/apt/sources.list** file - ensure to set `CODENAME` to your specific [Debian release name](https://wiki.debian.org/DebianReleases#Production\_Releases) (e.g. `bookworm` for Debian 12): +For Debian, you must add the Fluent Bit APT server entry to your sources lists. +Add the following content at bottom of your `/etc/apt/sources.list` file. -```bash +```bash copy deb [signed-by=/usr/share/keyrings/fluentbit-keyring.gpg] https://packages.fluentbit.io/debian/${CODENAME} ${CODENAME} main ``` +Replace _`CODENAME`_ with your specific +[Debian release name](https://wiki.debian.org/DebianReleases#Production\_Releases) +(for example: `bookworm` for Debian 12) + ## Update your repositories database -Now let your system update the _apt_ database: +Update your system's `apt` database: -```bash +```bash copy sudo apt-get update ``` {% hint style="info" %} -We recommend upgrading your system (`sudo apt-get upgrade`). This could avoid potential issues with expired certificates. +Fluent Bit recommends upgrading your system (`sudo apt-get upgrade`). This could +avoid potential issues with expired certificates. {% endhint %} ## Install Fluent Bit -Using the following _apt-get_ command you are able now to install the latest _fluent-bit_: +1. Use the following `apt-get` command to install the latest Fluent Bit: -```bash -sudo apt-get install fluent-bit -``` + ```bash copy + sudo apt-get install fluent-bit + ``` -Now the following step is to instruct _systemd_ to enable the service: +1. Instruct `systemd` to enable the service: -```bash -sudo systemctl start fluent-bit -``` + ```bash copy + sudo systemctl start fluent-bit + ``` -If you do a status check, you should see a similar output like this: +If you do a status check, you should see a similar output similar to: ```bash sudo service fluent-bit status @@ -98,4 +114,6 @@ sudo service fluent-bit status ... ``` -The default configuration of **fluent-bit** is collecting metrics of CPU usage and sending the records to the standard output, you can see the outgoing data in your _/var/log/syslog_ file. +The default Fluent Bit configuration collect metrics of CPU usage and sends the +records to the standard output. You can see the outgoing data in your +`/var/log/messages` file. diff --git a/installation/linux/raspbian-raspberry-pi.md b/installation/linux/raspbian-raspberry-pi.md index cc0889f18..cd498bd71 100644 --- a/installation/linux/raspbian-raspberry-pi.md +++ b/installation/linux/raspbian-raspberry-pi.md @@ -1,21 +1,26 @@ -# Raspbian / Raspberry Pi +# Raspbian and Raspberry Pi -Fluent Bit is distributed as **fluent-bit** package and is available for the Raspberry, specifically for [Raspbian](http://raspbian.org) distribution, the following versions are supported: +Fluent Bit is distributed as the `fluent-bit` package and is available for the +Raspberry, specifically for [Raspbian](http://raspbian.org) distribution. The +following versions are supported: -* Raspbian Bullseye \(11\) -* Raspbian Buster \(10\) +* Raspbian Bullseye (11) +* Raspbian Buster (10) ## Server GPG key -The first step is to add our server GPG key to your keyring, on that way you can get our signed packages: +The first step is to add the Fluent Bit server GPG key to your keyring so you +can get FLuent Bit signed packages: -```text +```shell curl https://packages.fluentbit.io/fluentbit.key | sudo apt-key add - ``` ### Updated key from March 2022 -From the 1.9.0 and 1.8.15 releases please note that the GPG key has been updated at [https://packages.fluentbit.io/fluentbit.key](https://packages.fluentbit.io/fluentbit.key) so ensure this new one is added. +For the 1.9.0 and 1.8.15 and later releases, the +[GPG key has been updated](https://packages.fluentbit.io/fluentbit.key). Ensure +this new one is added. The GPG Key fingerprint of the new key is: @@ -24,7 +29,8 @@ C3C0 A285 34B9 293E AF51 FABD 9F9D DC08 3888 C1CD Fluentbit releases (Releases signing key) <releases@fluentbit.io> ``` -The previous key is still available at [https://packages.fluentbit.io/fluentbit-legacy.key](https://packages.fluentbit.io/fluentbit-legacy.key) and may be required to install previous versions. +The previous key is [still available](https://packages.fluentbit.io/fluentbit-legacy.key) +and might be required to install previous versions. The GPG Key fingerprint of the old key is: @@ -32,19 +38,23 @@ The GPG Key fingerprint of the old key is: F209 D876 2A60 CD49 E680 633B 4FF8 368B 6EA0 722A ``` -Refer to the [supported platform documentation](./../supported-platforms.md) to see which platforms are supported in each release. +Refer to the [supported platform documentation](./../supported-platforms.md) to see +which platforms are supported in each release. ## Update your sources lists -On Debian and derivative systems such as Raspbian, you need to add our APT server entry to your sources lists, please add the following content at bottom of your **/etc/apt/sources.list** file. +On Debian and derivative systems such as Raspbian, you need to add the Fluent Bit +APT server entry to your sources lists. + +Add the following content at bottom of your `/etc/apt/sources.list` file. -### Raspbian 11 \(Bullseye\) +### Raspbian 11 (Bullseye) ```text deb https://packages.fluentbit.io/raspbian/bullseye bullseye main ``` -### Raspbian 10 \(Buster\) +### Raspbian 10 (Buster) ```text deb https://packages.fluentbit.io/raspbian/buster buster main @@ -52,29 +62,30 @@ deb https://packages.fluentbit.io/raspbian/buster buster main ### Update your repositories database -Now let your system update the _apt_ database: +Now let your system update the `apt` database: ```bash sudo apt-get update ``` {% hint style="info" %} -We recommend upgrading your system (```sudo apt-get upgrade```). This could avoid potential issues with expired certificates. +Fluent Bit recommends upgrading your system (`sudo apt-get upgrade`) to avoid +potential issues with expired certificates. {% endhint %} ## Install Fluent Bit -Using the following _apt-get_ command you are able now to install the latest _fluent-bit_: +1. Use the following `apt-get` command to install the latest Fluent Bit: -```text -sudo apt-get install fluent-bit -``` + ```shell + sudo apt-get install fluent-bit + ``` -Now the following step is to instruct _systemd_ to enable the service: +1. Instruct `systemd` to enable the service: -```bash -sudo service fluent-bit start -``` + ```bash + sudo service fluent-bit start + ``` If you do a status check, you should see a similar output like this: @@ -92,4 +103,6 @@ sudo service fluent-bit status ... ``` -The default configuration of **fluent-bit** is collecting metrics of CPU usage and sending the records to the standard output, you can see the outgoing data in your _/var/log/syslog_ file. +The default configuration of Fluent Bit collects metrics for CPU usage and +sends the records to the standard output. You can see the outgoing data in your +`/var/log/syslog` file. diff --git a/installation/linux/redhat-centos.md b/installation/linux/redhat-centos.md index 277a26f8a..c4c5fc5b6 100644 --- a/installation/linux/redhat-centos.md +++ b/installation/linux/redhat-centos.md @@ -1,60 +1,68 @@ -# Redhat / CentOS +# Red Hat and CentOS -## Install on Redhat / CentOS +Fluent Bit is distributed as the `fluent-bit` package and is available for the latest +stable CentOS system. -Fluent Bit is distributed as **fluent-bit** package and is available for the latest stable CentOS system. +Fluent Bit supports the following architectures: -The following architectures are supported +- `x86_64` +- `aarch64` +- `arm64v8` -* x86\_64 -* aarch64 / arm64v8 - -For CentOS 9+ we use CentOS Stream as the canonical base system. +For CentOS 9 and later, Fluent Bit uses [CentOS Stream](https://www.centos.org/centos-stream/) +as the canonical base system. ## Single line install -A simple installation script is provided to be used for most Linux targets. -This will always install the most recent version released. +Fluent Bit provides an installation script to use for most Linux targets. +This will always install the most recently released version. ```bash curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh ``` -This is purely a convenience helper and should always be validated prior to use. -The recommended secure deployment approach is to follow the instructions below. +This is a convenience helper and should always be validated prior to use. +The recommended secure deployment approach is to use the following instructions: ## CentOS 8 -CentOS 8 is now EOL so the default Yum repositories are unavailable. +CentOS 8 is now end-of-life, so the default Yum repositories are unavailable. -Make sure to configure to use an appropriate mirror, for example: +Ensure you've configured an appropriate mirror. For example: ```shell -$ sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \ - sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* +sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \ +sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* ``` -An alternative is to use Rocky or Alma Linux which _should_ be equivalent. +An alternative is to use Rocky or Alma Linux, which should be equivalent. ## Configure Yum -We provide **fluent-bit** through a Yum repository. In order to add the repository reference to your system, please add a new file called _fluent-bit.repo_ in _/etc/yum.repos.d/_ with the following content: +The `fluent-bit` is provided through a Yum repository. To add the repository +reference to your system: -```shell -[fluent-bit] -name = Fluent Bit -baseurl = https://packages.fluentbit.io/centos/$releasever/ -gpgcheck=1 -gpgkey=https://packages.fluentbit.io/fluentbit.key -repo_gpgcheck=1 -enabled=1 -``` +1. In `/etc/yum.repos.d/`, add a new file called `fluent-bit.repo`. +1. Add the following content to the file: -It is best practice to always enable the _gpgcheck_ and _repo\_gpgcheck_ for security reasons. We sign our repository metadata as well as all of our packages. + ```text + [fluent-bit] + name = Fluent Bit + baseurl = https://packages.fluentbit.io/centos/$releasever/ + gpgcheck=1 + gpgkey=https://packages.fluentbit.io/fluentbit.key + repo_gpgcheck=1 + enabled=1 + ``` + +1. As a best practice, enable `gpgcheck` and `repo_gpgcheck` for security reasons. + Fluent Bit signs its repository metadata and all Fluent Bit packages. ### Updated key from March 2022 -From the 1.9.0 and 1.8.15 releases please note that the GPG key has been updated at [https://packages.fluentbit.io/fluentbit.key](https://packages.fluentbit.io/fluentbit.key) so ensure this new one is added. +For the 1.9.0 and 1.8.15 and later releases, the +[GPG key has been updated](https://packages.fluentbit.io/fluentbit.key). Ensure +this new one is added. The GPG Key fingerprint of the new key is: @@ -63,7 +71,8 @@ C3C0 A285 34B9 293E AF51 FABD 9F9D DC08 3888 C1CD Fluentbit releases (Releases signing key) <releases@fluentbit.io> ``` -The previous key is still available at [https://packages.fluentbit.io/fluentbit-legacy.key](https://packages.fluentbit.io/fluentbit-legacy.key) and may be required to install previous versions. +The previous key is [still available](https://packages.fluentbit.io/fluentbit-legacy.key) +and might be required to install previous versions. The GPG Key fingerprint of the old key is: @@ -71,21 +80,22 @@ The GPG Key fingerprint of the old key is: F209 D876 2A60 CD49 E680 633B 4FF8 368B 6EA0 722A ``` -Refer to the [supported platform documentation](../supported-platforms.md) to see which platforms are supported in each release. +Refer to the [supported platform documentation](../supported-platforms.md) to see +which platforms are supported in each release. ### Install -Once your repository is configured, run the following command to install it: +1. After your repository is configured, run the following command to install it: -```bash -sudo yum install fluent-bit -``` + ```bash + sudo yum install fluent-bit + ``` -Now the following step is to instruct _Systemd_ to enable the service: +1. Instruct `Systemd` to enable the service: -```bash -sudo systemctl start fluent-bit -``` + ```bash + sudo systemctl start fluent-bit + ``` If you do a status check, you should see a similar output like this: @@ -100,13 +110,17 @@ $ systemctl status fluent-bit ... ``` -The default configuration of **fluent-bit** is collecting metrics of CPU usage and sending the records to the standard output, you can see the outgoing data in your _/var/log/messages_ file. +The default Fluent Bit configuration collect metrics of CPU usage and sends the +records to the standard output. You can see the outgoing data in your +`/var/log/messages` file. ## FAQ ### Yum install fails with a "404 - Page not found" error for the package mirror -The fluent-bit.repo file for the latest installations of Fluent-Bit uses a $releasever variable to determine the correct version of the package to install to your system: +The `fluent-bit.repo` file for the latest installations of Fluent Bit uses a +`$releasever` variable to determine the correct version of the package to install to +your system: ```text [fluent-bit] @@ -115,9 +129,13 @@ baseurl = https://packages.fluentbit.io/centos/$releasever/$basearch/ ... ``` -Depending on your Red Hat distribution version, this variable may return a value other than the OS major release version (e.g., RHEL7 Server distributions return "7Server" instead of just "7"). The Fluent-Bit package url uses just the major OS release version, so any other value here will cause a 404. +Depending on your Red Hat distribution version, this variable can return a value +other than the OS major release version (for example, RHEL7 Server distributions return +`7Server` instead of `7`). The Fluent Bit package URL uses the major OS +release version, so any other value here will cause a 404. -In order to resolve this issue, you can replace the $releasever variable with your system's OS major release version. For example: +To resolve this issue, replace the `$releasever` variable with your system's OS major +release version. For example: ```text [fluent-bit] diff --git a/installation/linux/ubuntu.md b/installation/linux/ubuntu.md index 5e8f20755..11ad94cdd 100644 --- a/installation/linux/ubuntu.md +++ b/installation/linux/ubuntu.md @@ -1,10 +1,12 @@ # Ubuntu -Fluent Bit is distributed as **fluent-bit** package and is available for the latest stable Ubuntu system: Jammy Jellyfish. +Fluent Bit is distributed as the `fluent-bit` package and is available for long-term +support releases of Ubuntu. The latest officially supported version is Noble Numbat +(24.04). ## Single line install -A simple installation script is provided to be used for most Linux targets. +An installation script is provided for most Linux targets. This will always install the most recent version released. ```bash @@ -12,12 +14,15 @@ curl https://raw.githubusercontent.com/fluent/fluent-bit/master/install.sh | sh ``` This is purely a convenience helper and should always be validated prior to use. -The recommended secure deployment approach is to follow the instructions below. +The recommended secure deployment approach is to use the following instructions. ## Server GPG key -The first step is to add our server GPG key to your keyring to ensure you can get our signed packages. -Follow the official Debian wiki guidance: <https://wiki.debian.org/DebianRepository/UseThirdParty#OpenPGP\_Key\_distribution> +The first step is to add the Fluent Bit server GPG key to your keyring to ensure +you can get the correct signed packages. + +Follow the official +[Debian wiki guidance](https://wiki.debian.org/DebianRepository/UseThirdParty#OpenPGP_Key_distribution). ```bash curl https://packages.fluentbit.io/fluentbit.key | gpg --dearmor > /usr/share/keyrings/fluentbit-keyring.gpg @@ -25,7 +30,9 @@ curl https://packages.fluentbit.io/fluentbit.key | gpg --dearmor > /usr/share/ke ### Updated key from March 2022 -From the 1.9.0 and 1.8.15 releases please note that the GPG key has been updated at [https://packages.fluentbit.io/fluentbit.key](https://packages.fluentbit.io/fluentbit.key) so ensure this new one is added. +For releases 1.9.0 and 1.8.15 and later, the +[GPG key has been updated](https://packages.fluentbit.io/fluentbit.key). Ensure +the new key is added. The GPG Key fingerprint of the new key is: @@ -34,7 +41,8 @@ C3C0 A285 34B9 293E AF51 FABD 9F9D DC08 3888 C1CD Fluentbit releases (Releases signing key) <releases@fluentbit.io> ``` -The previous key is still available at [https://packages.fluentbit.io/fluentbit-legacy.key](https://packages.fluentbit.io/fluentbit-legacy.key) and may be required to install previous versions. +The previous key is [still available](https://packages.fluentbit.io/fluentbit-legacy.key) +and might be required to install previous versions. The GPG Key fingerprint of the old key is: @@ -42,11 +50,15 @@ The GPG Key fingerprint of the old key is: F209 D876 2A60 CD49 E680 633B 4FF8 368B 6EA0 722A ``` -Refer to the [supported platform documentation](../supported-platforms.md) to see which platforms are supported in each release. +Refer to the [supported platform documentation](../supported-platforms.md) to see +which platforms are supported in each release. ## Update your sources lists -On Ubuntu, you need to add our APT server entry to your sources lists, please add the following content at bottom of your **/etc/apt/sources.list** file - ensure to set `CODENAME` to your specific [Ubuntu release name](https://wiki.ubuntu.com/Releases) (e.g. `focal` for Ubuntu 20.04): +On Ubuntu, you need to add the Fluent Bit APT server entry to your sources lists. +Add the following content at bottom of your `/etc/apt/sources.list` file. Ensure +`CODENAME` is set to your specific [Ubuntu release name](https://wiki.ubuntu.com/Releases). +For example, `focal` for Ubuntu 20.04. ```bash deb [signed-by=/usr/share/keyrings/fluentbit-keyring.gpg] https://packages.fluentbit.io/ubuntu/${CODENAME} ${CODENAME} main @@ -54,33 +66,38 @@ deb [signed-by=/usr/share/keyrings/fluentbit-keyring.gpg] https://packages.fluen ### Update your repositories database -Now let your system update the _apt_ database: +Update the `apt` database on your system: ```bash sudo apt-get update ``` {% hint style="info" %} -We recommend upgrading your system (`sudo apt-get upgrade`). This could avoid potential issues with expired certificates. -{% endhint %} +Fluent Bit recommends upgrading your system to avoid potential issues +with expired certificates: -{% hint style="info" %} -If you have the following error "Certificate verification failed", you might want to check if the package `ca-certificates` is properly installed (`sudo apt-get install ca-certificates`). +`sudo apt-get upgrade` + + +If you receive the error `Certificate verification failed`, check if the package +`ca-certificates` is properly installed: + +`sudo apt-get install ca-certificates` {% endhint %} ## Install Fluent Bit -Using the following _apt-get_ command you are able now to install the latest _fluent-bit_: +1. Use the following `apt-get` command to install the latest Fluent Bit: -```text -sudo apt-get install fluent-bit -``` + ```bash copy + sudo apt-get install fluent-bit + ``` -Now the following step is to instruct _systemd_ to enable the service: +1. Instruct `systemd` to enable the service: -```bash -sudo systemctl start fluent-bit -``` + ```bash copy + sudo systemctl start fluent-bit + ``` If you do a status check, you should see a similar output like this: @@ -98,4 +115,6 @@ systemctl status fluent-bit ... ``` -The default configuration of **fluent-bit** is collecting metrics of CPU usage and sending the records to the standard output, you can see the outgoing data in your _/var/log/syslog_ file. +The default configuration of `fluent-bit` is collecting metrics of CPU usage and +sending the records to the standard output. You can see the outgoing data in your +`/var/log/syslog` file. diff --git a/installation/macos.md b/installation/macos.md index b872b4509..1772b3091 100644 --- a/installation/macos.md +++ b/installation/macos.md @@ -1,30 +1,31 @@ # macOS -Fluent Bit is compatible with latest Apple macOS system on x86_64 and Apple Silicon M1 architectures. -At the moment there is only an official supported package on x86_64 but you can build it from source as well by following the instructions below. +Fluent Bit is compatible with the latest Apple macOS software for x86_64 and +Apple Silicon architectures. -## Installation Packages +## Installation packages -The packages can be found here: <https://packages.fluentbit.io/macos/> +Installation packages can be found [here](https://packages.fluentbit.io/macos/). ## Requirements -For the next steps, you will need to have [Homebrew](https://brew.sh/) installed in your system. -If is not there, you can install it with the following command: +You must have [Homebrew](https://brew.sh/) installed in your system. +If it isn't present, install it with the following command: -```bash +```bash copy /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" ``` ## Installing from Homebrew -The Fluent Bit package on Homebrew is not officially supported, but should work for basic use cases and testing. It can be installed using: +The Fluent Bit package on Homebrew isn't officially supported, but should work for +basic use cases and testing. It can be installed using: -```bash +```bash copy brew install fluent-bit ``` -## Compile from Source +## Compile from source ### Install build dependencies @@ -34,97 +35,101 @@ Run the following brew command in your terminal to retrieve the dependencies: brew install git cmake openssl bison ``` -## Get the source and build it +## Download and build the source -Grab a fresh copy of the Fluent Bit source code (upstream): +1. Download a copy of the Fluent Bit source code (upstream): -```bash -git clone https://github.com/fluent/fluent-bit -cd fluent-bit -``` + ```bash + git clone https://github.com/fluent/fluent-bit + cd fluent-bit + ``` -Optionally, if you want to use a specific version, just checkout to the proper tag. -If you want to use `v1.8.13` just do: + If you want to use a specific version, checkout to the proper tag. + For example, to use `v1.8.13`, use the command: -```bash -git checkout v1.8.13 -``` + ```bash copy + git checkout v1.8.13 + ``` -In order to prepare the build system, we need to expose certain environment variables so Fluent Bit CMake build rules can pick the right libraries: +1. To prepare the build system, you must expose certain environment variables so + Fluent Bit CMake build rules can pick the right libraries: -```bash -export OPENSSL_ROOT_DIR=`brew --prefix openssl` -export PATH=`brew --prefix bison`/bin:$PATH -``` + ```bash copy + export OPENSSL_ROOT_DIR=`brew --prefix openssl` + export PATH=`brew --prefix bison`/bin:$PATH + ``` -Change to the _build/_ directory inside the Fluent Bit sources: +1. Change to the `build/` directory inside the Fluent Bit sources: -```bash -cd build/ -``` + ```bash + cd build/ + ``` -Build Fluent Bit. -Note that we are indicating to the build system "where" the final binaries and config files should be installed: +1. Build Fluent Bit. This example indicates to the build system the location + the final binaries and `config` files should be installed: -```bash -cmake -DFLB_DEV=on -DCMAKE_INSTALL_PREFIX=/opt/fluent-bit ../ -make -j 16 -``` + ```bash + cmake -DFLB_DEV=on -DCMAKE_INSTALL_PREFIX=/opt/fluent-bit ../ + make -j 16 + ``` -Install Fluent Bit to the directory specified above. -Note that this requires root privileges due to the directory we will write information to: +1. Install Fluent Bit to the previously specified directory. + Writing to this directory requires root privileges. -```bash -sudo make install -``` + ```bash + sudo make install + ``` The binaries and configuration examples can be located at `/opt/fluent-bit/`. ## Create macOS installer from source -Grab a fresh copy of the Fluent Bit source code (upstream): +1. Clone the Fluent Bit source code (upstream): -```bash -git clone https://github.com/fluent/fluent-bit -cd fluent-bit -``` + ```bash + git clone https://github.com/fluent/fluent-bit + cd fluent-bit + ``` -Optionally, if you want to use a specific version, just checkout to the proper tag. If you want to use `v1.9.2` just do: + If you want to use a specific version, checkout to the proper tag. For example, + to use `v1.9.2` do: -```bash -git checkout v1.9.2 -``` + ```bash + git checkout v1.9.2 + ``` -In order to prepare the build system, we need to expose certain environment variables so Fluent Bit CMake build rules can pick the right libraries: +1. To prepare the build system, you must expose certain environment variables so + Fluent Bit CMake build rules can pick the right libraries: -```bash -export OPENSSL_ROOT_DIR=`brew --prefix openssl` -export PATH=`brew --prefix bison`/bin:$PATH -``` + ```bash copy + export OPENSSL_ROOT_DIR=`brew --prefix openssl` + export PATH=`brew --prefix bison`/bin:$PATH + ``` -And then, creating the specific macOS SDK target (For example, specifying macOS Big Sur (11.3) SDK environment): +1. Create the specific macOS SDK target. For example, to specify macOS Big Sur + (11.3) SDK environment: -```bash -export MACOSX_DEPLOYMENT_TARGET=11.3 -``` + ```bash copy + export MACOSX_DEPLOYMENT_TARGET=11.3 + ``` -Change to the _build/_ directory inside the Fluent Bit sources: +1. Change to the `build/` directory inside the Fluent Bit sources: -```bash -cd build/ -``` + ```bash copy + cd build/ + ``` -Build the Fluent Bit macOS installer. +1. Build the Fluent Bit macOS installer: -```bash -cmake -DCPACK_GENERATOR=productbuild -DCMAKE_INSTALL_PREFIX=/opt/fluent-bit ../ -make -j 16 -cpack -G productbuild -``` + ```bash copy + cmake -DCPACK_GENERATOR=productbuild -DCMAKE_INSTALL_PREFIX=/opt/fluent-bit ../ + make -j 16 + cpack -G productbuild + ``` -Then, macOS installer will be generated as: +The macOS installer will be generated as: -```log +```text CPack: Create package using productbuild CPack: Install projects CPack: - Run preinstall target for: fluent-bit @@ -141,27 +146,29 @@ CPack: - Building component package: /Users/fluent-bit-builder/GitHub/fluent-b CPack: - package: /Users/fluent-bit-builder/GitHub/fluent-bit/build/fluent-bit-1.9.2-apple.pkg generated. ``` -Finally, fluent-bit-`<fluent-bit version>`-`(intel or apple)`.pkg will be generated. +Finally, the `fluent-bit-<fluent-bit version>-(intel or apple)`.pkg will be generated. The created installer will put binaries at `/opt/fluent-bit/`. ## Running Fluent Bit -To make the access path easier to Fluent Bit binary, in your terminal extend the `PATH` variable: +To make the access path easier to Fluent Bit binary, extend the `PATH` variable: -```bash +```bash copy export PATH=/opt/fluent-bit/bin:$PATH ``` -Now as a simple test, try Fluent Bit by generating a simple dummy message which will be printed to the standard output interface every 1 second: +To test, try Fluent Bit by generating a test message using the +[Dummy input plugin](https://docs.fluentbit.io/manual/pipeline/inputs/dummy) +which prints to the standard output interface every one second: -```bash - fluent-bit -i dummy -o stdout -f 1 +```bash copy +fluent-bit -i dummy -o stdout -f 1 ``` You will see an output similar to this: -```bash +```text Fluent Bit v1.9.0 * Copyright (C) 2015-2021 The Fluent Bit Authors * Fluent Bit is a CNCF sub-project under the umbrella of Fluentd diff --git a/installation/requirements.md b/installation/requirements.md index efaf1ada0..fe4399b27 100644 --- a/installation/requirements.md +++ b/installation/requirements.md @@ -1,12 +1,20 @@ # Requirements -[Fluent Bit](http://fluentbit.io) uses very low CPU and Memory consumption, it's compatible with most of x86, x86\_64, arm32v7, arm64v8 based platforms. In order to build it you need the following components in your system for the build process: +[Fluent Bit](http://fluentbit.io) has very low CPU and memory consumption. It's +compatible with most x86-, x86_64-, arm32v7-, and arm64v8-based platforms. -* Compiler: GCC or clang -* CMake -* Flex & Bison: only if you enable the Stream Processor or Record Accessor feature \(both enabled by default\) -* Libyaml development headers and libraries +The build process requires the following components: -In the core there are not other dependencies, For certain features that depends on third party components like output plugins with special backend libraries \(e.g: kafka\), those are included in the main source code repository. +- Compiler: GCC or clang +- CMake +- Flex and Bison: Required for + [Stream Processor](https://docs.fluentbit.io/manual/stream-processing/introduction) + or [Record Accessor](https://docs.fluentbit.io/manual/administration/configuring-fluent-bit/classic-mode/record-accessor) +- Libyaml development headers and libraries -Fluent Bit is supported on Linux on IBM Z(s390x), but the WASM and LUA filter plugins are not. +Core has no other dependencies. Some features depend on third-party components. +For example, output plugins with special backend libraries like Kafka include those +libraries in the main source code repository. + +Fluent Bit is supported on Linux on IBM Z(s390x), but the WASM and LUA filter +plugins aren't. diff --git a/installation/sources/build-and-install.md b/installation/sources/build-and-install.md index 4453ce96e..c30e2c115 100644 --- a/installation/sources/build-and-install.md +++ b/installation/sources/build-and-install.md @@ -1,214 +1,243 @@ -# Build and Install +# Build and install -[Fluent Bit](http://fluentbit.io) uses [CMake](http://cmake.org) as its build system. The suggested procedure to prepare the build system consists of the following steps: +[Fluent Bit](http://fluentbit.io) uses [CMake](http://cmake.org) as its build system. ## Requirements -- CMake >= 3.12 +- CMake 3.12 or greater. You might need to use `cmake3` instead of `cmake`. - Flex -- Bison >= 3 +- Bison 3 or greater - YAML headers - OpenSSL headers ## Prepare environment -> In the following steps you can find exact commands to build and install the project with the default options. If you already know how CMake works you can skip this part and look at the build options available. Note that Fluent Bit requires CMake 3.x. You may need to use `cmake3` instead of `cmake` to complete the following steps on your system. - -Change to the _build/_ directory inside the Fluent Bit sources: - -```bash -$ cd build/ -``` - -Let [CMake](http://cmake.org) configure the project specifying where the root path is located: - -```bash -$ cmake ../ --- The C compiler identification is GNU 4.9.2 --- Check for working C compiler: /usr/bin/cc --- Check for working C compiler: /usr/bin/cc -- works --- Detecting C compiler ABI info --- Detecting C compiler ABI info - done --- The CXX compiler identification is GNU 4.9.2 --- Check for working CXX compiler: /usr/bin/c++ --- Check for working CXX compiler: /usr/bin/c++ -- works -... --- Could NOT find Doxygen (missing: DOXYGEN_EXECUTABLE) --- Looking for accept4 --- Looking for accept4 - not found --- Configuring done --- Generating done --- Build files have been written to: /home/edsiper/coding/fluent-bit/build -``` - -Now you are ready to start the compilation process through the simple _make_ command: - -```bash -$ make -Scanning dependencies of target msgpack -[ 2%] Building C object lib/msgpack-1.1.0/CMakeFiles/msgpack.dir/src/unpack.c.o -[ 4%] Building C object lib/msgpack-1.1.0/CMakeFiles/msgpack.dir/src/objectc.c.o -[ 7%] Building C object lib/msgpack-1.1.0/CMakeFiles/msgpack.dir/src/version.c.o -... -[ 19%] Building C object lib/monkey/mk_core/CMakeFiles/mk_core.dir/mk_file.c.o -[ 21%] Building C object lib/monkey/mk_core/CMakeFiles/mk_core.dir/mk_rconf.c.o -[ 23%] Building C object lib/monkey/mk_core/CMakeFiles/mk_core.dir/mk_string.c.o -... -Scanning dependencies of target fluent-bit-static -[ 66%] Building C object src/CMakeFiles/fluent-bit-static.dir/flb_pack.c.o -[ 69%] Building C object src/CMakeFiles/fluent-bit-static.dir/flb_input.c.o -[ 71%] Building C object src/CMakeFiles/fluent-bit-static.dir/flb_output.c.o -... -Linking C executable ../bin/fluent-bit -[100%] Built target fluent-bit-bin -``` - -to continue installing the binary on the system just do: - -```bash -$ make install -``` - -it's likely you may need root privileges so you can try to prefixing the command with _sudo_. - -## Build Options - -Fluent Bit provides certain options to CMake that can be enabled or disabled when configuring, please refer to the following tables under the _General Options_, _Development Options_, Input Plugins _and \_Output Plugins_ sections. - -### General Options - -| option | description | default | +If you already know how CMake works, you can skip this section and review the +available [build options](#general-options). + +The following steps explain how to build and install the project with the default +options. + +1. Change to the `build/` directory inside the Fluent Bit sources: + + ```bash + cd build/ + ``` + +1. Let [CMake](http://cmake.org) configure the project specifying where the root + path is located: + + ```bash + cmake ../ + ``` + + This command displays a series of results similar to: + + ```text + -- The C compiler identification is GNU 4.9.2 + -- Check for working C compiler: /usr/bin/cc + -- Check for working C compiler: /usr/bin/cc -- works + -- Detecting C compiler ABI info + -- Detecting C compiler ABI info - done + -- The CXX compiler identification is GNU 4.9.2 + -- Check for working CXX compiler: /usr/bin/c++ + -- Check for working CXX compiler: /usr/bin/c++ -- works + ... + -- Could NOT find Doxygen (missing: DOXYGEN_EXECUTABLE) + -- Looking for accept4 + -- Looking for accept4 - not found + -- Configuring done + -- Generating done + -- Build files have been written to: /home/edsiper/coding/fluent-bit/build + ``` + +1. Start the compilation process using the `make` command: + + ```bash + make + ``` + + This command displays results similar to: + + ```text + Scanning dependencies of target msgpack + [ 2%] Building C object lib/msgpack-1.1.0/CMakeFiles/msgpack.dir/src/unpack.c.o + [ 4%] Building C object lib/msgpack-1.1.0/CMakeFiles/msgpack.dir/src/objectc.c.o + [ 7%] Building C object lib/msgpack-1.1.0/CMakeFiles/msgpack.dir/src/version.c.o + ... + [ 19%] Building C object lib/monkey/mk_core/CMakeFiles/mk_core.dir/mk_file.c.o + [ 21%] Building C object lib/monkey/mk_core/CMakeFiles/mk_core.dir/mk_rconf.c.o + [ 23%] Building C object lib/monkey/mk_core/CMakeFiles/mk_core.dir/mk_string.c.o + ... + Scanning dependencies of target fluent-bit-static + [ 66%] Building C object src/CMakeFiles/fluent-bit-static.dir/flb_pack.c.o + [ 69%] Building C object src/CMakeFiles/fluent-bit-static.dir/flb_input.c.o + [ 71%] Building C object src/CMakeFiles/fluent-bit-static.dir/flb_output.c.o + ... + Linking C executable ../bin/fluent-bit + [100%] Built target fluent-bit-bin + ``` + +1. To continue installing the binary on the system, use `make install`: + + ```bash + make install + ``` + + If the command indicates insufficient permissions, prefix the command with `sudo`. + +## Build options + +Fluent Bit provides configurable options to CMake that can be enabled or disabled. + +### General options + +| Option | Description | Default | | :--- | :--- | :--- | -| FLB\_ALL | Enable all features available | No | -| FLB\_JEMALLOC | Use Jemalloc as default memory allocator | No | -| FLB\_TLS | Build with SSL/TLS support | Yes | -| FLB\_BINARY | Build executable | Yes | -| FLB\_EXAMPLES | Build examples | Yes | -| FLB\_SHARED\_LIB | Build shared library | Yes | -| FLB\_MTRACE | Enable mtrace support | No | -| FLB\_INOTIFY | Enable Inotify support | Yes | -| FLB\_POSIX\_TLS | Force POSIX thread storage | No | -| FLB\_SQLDB | Enable SQL embedded database support | No | -| FLB\_HTTP\_SERVER | Enable HTTP Server | No | -| FLB\_LUAJIT | Enable Lua scripting support | Yes | -| FLB\_RECORD\_ACCESSOR | Enable record accessor | Yes | -| FLB\_SIGNV4 | Enable AWS Signv4 support | Yes | -| FLB\_STATIC\_CONF | Build binary using static configuration files. The value of this option must be a directory containing configuration files. | | -| FLB\_STREAM\_PROCESSOR | Enable Stream Processor | Yes | -| FLB\_CONFIG\_YAML | Enable YAML configuration support | Yes | -| FLB\_WASM | Build with WASM runtime support | Yes | -| FLB\_WAMRC | Build with WASM AOT compiler executable | No | - -### Development Options - -| option | description | default | +| `FLB_ALL` | Enable all features available | _No_ | +| `FLB_JEMALLOC` | Use Jemalloc as default memory allocator | _No_ | +| `FLB_TLS` | Build with SSL/TLS support | _Yes_ | +| `FLB_BINARY` | Build executable | _Yes_ | +| `FLB_EXAMPLES` | Build examples | _Yes_ | +| `FLB_SHARED_LIB` | Build shared library | _Yes_ | +| `FLB_MTRACE` | Enable mtrace support | _No_ | +| `FLB_INOTIFY` | Enable Inotify support | _Yes_ | +| `FLB_POSIX_TLS` | Force POSIX thread storage | _No_ | +| `FLB_SQLDB` | Enable SQL embedded database support | _No_ | +| `FLB_HTTP_SERVER` | Enable HTTP Server | _No_ | +| `FLB_LUAJIT` | Enable Lua scripting support | _Yes_ | +| `FLB_RECORD_ACCESSOR` | Enable record accessor | _Yes_ | +| `FLB_SIGNV4` | Enable AWS Signv4 support | _Yes_ | +| `FLB_STATIC_CONF` | Build binary using static configuration files. The value of this option must be a directory containing configuration files. | | +| `FLB_STREAM_PROCESSOR` | Enable Stream Processor | _Yes_ | +| `FLB_CONFIG_YAML` | Enable YAML configuration support | _Yes_ | +| `FLB_WASM` | Build with WASM runtime support | _Yes_ | +| `FLB_WAMRC` | Build with WASM AOT compiler executable | _No_ | + +### Development options + +| Option | Description | Default | | :--- | :--- | :--- | -| FLB\_DEBUG | Build binaries with debug symbols | No | -| FLB\_VALGRIND | Enable Valgrind support | No | -| FLB\_TRACE | Enable trace mode | No | -| FLB\_SMALL | Minimise binary size | No | -| FLB\_TESTS\_RUNTIME | Enable runtime tests | No | -| FLB\_TESTS\_INTERNAL | Enable internal tests | No | -| FLB\_TESTS | Enable tests | No | -| FLB\_BACKTRACE | Enable backtrace/stacktrace support | Yes | - -### Optimization Options - -| option | description | default | +| `FLB_DEBUG` | Build binaries with debug symbols | _No_ | +| `FLB_VALGRIND` | Enable Valgrind support | _No_ | +| `FLB_TRACE` | Enable trace mode | _No_ | +| `FLB_SMALL` | Minimise binary size | _No_ | +| `FLB_TESTS_RUNTIME` | Enable runtime tests | _No_ | +| `FLB_TESTS_INTERNAL` | Enable internal tests | _No_ | +| `FLB_TESTS` | Enable tests | _No_ | +| `FLB_BACKTRACE` | Enable backtrace/stacktrace support | _Yes_ | + +### Optimization options + +| Option | Description | Default | | :--- | :--- | :--- | -| FLB\_MSGPACK\_TO\_JSON\_INIT\_BUFFER\_SIZE | Determine initial buffer size for msgpack to json conversion in terms of memory used by payload. | 2.0 | -| FLB\_MSGPACK\_TO\_JSON\_REALLOC\_BUFFER\_SIZE | Determine percentage of reallocation size when msgpack to json conversion buffer runs out of memory. | 0.1 | +| `FLB_MSGPACK_TO_JSON_INIT_BUFFER_SIZE` | Determine initial buffer size for `msgpack` to `json` conversion in terms of memory used by payload. | `2.0` | +| `FLB_MSGPACK_TO_JSON_REALLOC_BUFFER_SIZE` | Determine percentage of reallocation size when `msgpack` to `json` conversion buffer runs out of memory. | `0.1` | -### Input Plugins +### Input plugins -The _input plugins_ provides certain features to gather information from a specific source type which can be a network interface, some built-in metric or through a specific input device, the following input plugins are available: +Input plugins gather information from a specific source type like network interfaces, +some built-in metrics, or through a specific input device. The following input plugins +are available: -| option | description | default | +| Option | Description | Default | | :--- | :--- | :--- | -| [FLB\_IN\_COLLECTD](../../pipeline/inputs/collectd.md) | Enable Collectd input plugin | On | -| [FLB\_IN\_CPU](../../pipeline/inputs/cpu-metrics.md) | Enable CPU input plugin | On | -| [FLB\_IN\_DISK](../../pipeline/inputs/disk-io-metrics.md) | Enable Disk I/O Metrics input plugin | On | -| [FLB\_IN\_DOCKER](../../pipeline/inputs/docker-events.md) | Enable Docker metrics input plugin | On | -| [FLB\_IN\_EXEC](../../pipeline/inputs/exec.md) | Enable Exec input plugin | On | -| [FLB\_IN\_EXEC\_WASI](../../pipeline/inputs/exec-wasi.md) | Enable Exec WASI input plugin | On | -| [FLB_IN_FLUENTBIT_METRICS](../../pipeline/inputs/fluentbit-metrics.md) | Enable Fluent Bit metrics input plugin | On | -| [FLB\_IN\_ELASTICSEARCH](../../pipeline/inputs/elasticsearch.md) | Enable Elasticsearch/OpenSearch Bulk input plugin | On | -| [FLB\_IN\_FORWARD](../../pipeline/inputs/forward.md) | Enable Forward input plugin | On | -| [FLB\_IN\_HEAD](../../pipeline/inputs/head.md) | Enable Head input plugin | On | -| [FLB\_IN\_HEALTH](../../pipeline/inputs/health.md) | Enable Health input plugin | On | -| [FLB\_IN\_KMSG](../../pipeline/inputs/kernel-logs.md) | Enable Kernel log input plugin | On | -| [FLB\_IN\_MEM](../../pipeline/inputs/memory-metrics.md) | Enable Memory input plugin | On | -| [FLB\_IN\_MQTT](../../pipeline/inputs/mqtt.md) | Enable MQTT Server input plugin | On | -| [FLB\_IN\_NETIF](../../pipeline/inputs/network-io-metrics.md) | Enable Network I/O metrics input plugin | On | -| [FLB\_IN\_PROC](../../pipeline/inputs/process.md) | Enable Process monitoring input plugin | On | -| [FLB\_IN\_RANDOM](../../pipeline/inputs/random.md) | Enable Random input plugin | On | -| [FLB\_IN\_SERIAL](../../pipeline/inputs/serial-interface.md) | Enable Serial input plugin | On | -| [FLB\_IN\_STDIN](../../pipeline/inputs/standard-input.md) | Enable Standard input plugin | On | -| [FLB\_IN\_SYSLOG](../../pipeline/inputs/syslog.md) | Enable Syslog input plugin | On | -| [FLB\_IN\_SYSTEMD](../../pipeline/inputs/systemd.md) | Enable Systemd / Journald input plugin | On | -| [FLB\_IN\_TAIL](../../pipeline/inputs/tail.md) | Enable Tail \(follow files\) input plugin | On | -| [FLB\_IN\_TCP](../../pipeline/inputs/tcp.md) | Enable TCP input plugin | On | -| [FLB\_IN\_THERMAL](../../pipeline/inputs/thermal.md) | Enable system temperature\(s\) input plugin | On | -| [FLB\_IN\_UDP](../../pipeline/inputs/udp.md) | Enable UDP input plugin | On | -| [FLB\_IN\_WINLOG](../../pipeline/inputs/windows-event-log.md) | Enable Windows Event Log input plugin \(Windows Only\) | On | -| [FLB\_IN\_WINEVTLOG](../../pipeline/inputs/windows-event-log-winevtlog.md) | Enable Windows Event Log input plugin using winevt.h API \(Windows Only\) | On | - -### Filter Plugins - -The _filter plugins_ allows to modify, enrich or drop records. The following table describes the filters available on this version: - -| option | description | default | +| [`FLB_IN_COLLECTD`](../../pipeline/inputs/collectd.md) | Enable Collectd input plugin | _On_ | +| [`FLB_IN_CPU`](../../pipeline/inputs/cpu-metrics.md) | Enable CPU input plugin | _On_ | +| [`FLB_IN_DISK`](../../pipeline/inputs/disk-io-metrics.md) | Enable Disk I/O Metrics input plugin | _On_ | +| [`FLB_IN_DOCKER`](../../pipeline/inputs/docker-events.md) | Enable Docker metrics input plugin | _On_ | +| [`FLB_IN_EXEC`](../../pipeline/inputs/exec.md) | Enable Exec input plugin | _On_ | +| [`FLB_IN_EXEC_WASI`](../../pipeline/inputs/exec-wasi.md) | Enable Exec WASI input plugin | _On_ | +| [`FLB_IN_FLUENTBIT_METRICS`](../../pipeline/inputs/fluentbit-metrics.md) | Enable Fluent Bit metrics input plugin | _On_ | +| [`FLB_IN_ELASTICSEARCH`](../../pipeline/inputs/elasticsearch.md) | Enable Elasticsearch/OpenSearch Bulk input plugin | _On_ | +| [`FLB_IN_FORWARD`](../../pipeline/inputs/forward.md) | Enable Forward input plugin | _On_ | +| [`FLB_IN_HEAD`](../../pipeline/inputs/head.md) | Enable Head input plugin | _On_ | +| [`FLB_IN_HEALTH`](../../pipeline/inputs/health.md) | Enable Health input plugin | _On_ | +| [`FLB_IN_KMSG`](../../pipeline/inputs/kernel-logs.md) | Enable Kernel log input plugin | _On_ | +| [`FLB_IN_MEM`](../../pipeline/inputs/memory-metrics.md) | Enable Memory input plugin | _On_ | +| [`FLB_IN_MQTT`](../../pipeline/inputs/mqtt.md) | Enable MQTT Server input plugin | _On_ | +| [`FLB_IN_NETIF`](../../pipeline/inputs/network-io-metrics.md) | Enable Network I/O metrics input plugin | _On_ | +| [`FLB_IN_PROC`](../../pipeline/inputs/process.md) | Enable Process monitoring input plugin | _On_ | +| [`FLB_IN_RANDOM`](../../pipeline/inputs/random.md) | Enable Random input plugin | _On_ | +| [`FLB_IN_SERIAL`](../../pipeline/inputs/serial-interface.md) | Enable Serial input plugin | _On_ | +| [`FLB_IN_STDIN`](../../pipeline/inputs/standard-input.md) | Enable Standard input plugin | _On_ | +| [`FLB_IN_SYSLOG`](../../pipeline/inputs/syslog.md) | Enable Syslog input plugin | _On_ | +| [`FLB_IN_SYSTEMD`](../../pipeline/inputs/systemd.md) | Enable Systemd / Journald input plugin | _On_ | +| [`FLB_IN_TAIL`](../../pipeline/inputs/tail.md) | Enable Tail (follow files) input plugin | _On_ | +| [`FLB_IN_TCP`](../../pipeline/inputs/tcp.md) | Enable TCP input plugin | _On_ | +| [`FLB_IN_THERMAL`](../../pipeline/inputs/thermal.md) | Enable system temperature input plugin | _On_ | +| [`FLB_IN_UDP`](../../pipeline/inputs/udp.md) | Enable UDP input plugin | _On_ | +| [`FLB_IN_WINLOG`](../../pipeline/inputs/windows-event-log.md) | Enable Windows Event Log input plugin (Windows Only) | _On_ | +| [`FLB_IN_WINEVTLOG`](../../pipeline/inputs/windows-event-log-winevtlog.md) | Enable Windows Event Log input plugin using `winevt.h` API (Windows Only) | _On_ | + +### Filter plugins + +Filter plugins let you modify, enrich or drop records. The following table describes +the filters available on this version: + +| Option | Description | Default | | :--- | :--- | :--- | -| [FLB\_FILTER\_AWS](../../pipeline/filters/aws-metadata.md) | Enable AWS metadata filter | On | -| [FLB\_FILTER\_ECS](../../pipeline/filters/ecs-metadata.md) | Enable AWS metadata filter | On | -| FLB\_FILTER\_EXPECT | Enable Expect data test filter | On | -| [FLB\_FILTER\_GREP](../../pipeline/filters/grep.md) | Enable Grep filter | On | -| [FLB\_FILTER\_KUBERNETES](../../pipeline/filters/kubernetes.md) | Enable Kubernetes metadata filter | On | -| [FLB\_FILTER\_LUA](../../pipeline/filters/lua.md) | Enable Lua scripting filter | On | -| [FLB\_FILTER\_MODIFY](../../pipeline/filters/modify.md) | Enable Modify filter | On | -| [FLB\_FILTER\_NEST](../../pipeline/filters/nest.md) | Enable Nest filter | On | -| [FLB\_FILTER\_PARSER](../../pipeline/filters/parser.md) | Enable Parser filter | On | -| [FLB\_FILTER\_RECORD\_MODIFIER](../../pipeline/filters/record-modifier.md) | Enable Record Modifier filter | On | -| [FLB\_FILTER\_REWRITE\_TAG](../../pipeline/filters/rewrite-tag.md) | Enable Rewrite Tag filter | On | -| [FLB\_FILTER\_STDOUT](../../pipeline/filters/standard-output.md) | Enable Stdout filter | On | -| [FLB\_FILTER\_SYSINFO](../../pipeline/filters/sysinfo.md) | Enable Sysinfo filter | On | -| [FLB\_FILTER\_THROTTLE](../../pipeline/filters/throttle.md) | Enable Throttle filter | On | -| [FLB\_FILTER\_TYPE\_CONVERTER](../../pipeline/filters/type-converter.md) | Enable Type Converter filter | On | -| [FLB\_FILTER\_WASM](../../pipeline/filters/wasm.md) | Enable WASM filter | On | - -### Output Plugins - -The _output plugins_ gives the capacity to flush the information to some external interface, service or terminal, the following table describes the output plugins available as of this version: - -| option | description | default | +| [`FLB_FILTER_AWS`](../../pipeline/filters/aws-metadata.md) | Enable AWS metadata filter | _On_ | +| [`FLB_FILTER_ECS`](../../pipeline/filters/ecs-metadata.md) | Enable AWS metadata filter | _On_ | +| `FLB_FILTER_EXPECT` | Enable Expect data test filter | _On_ | +| [`FLB_FILTER_GREP`](../../pipeline/filters/grep.md) | Enable Grep filter | _On_ | +| [`FLB_FILTER_KUBERNETES`](../../pipeline/filters/kubernetes.md) | Enable Kubernetes metadata filter | _On_ | +| [`FLB_FILTER_LUA`](../../pipeline/filters/lua.md) | Enable Lua scripting filter | _On_ | +| [`FLB_FILTER_MODIFY`](../../pipeline/filters/modify.md) | Enable Modify filter | _On_ | +| [`FLB_FILTER_NEST`](../../pipeline/filters/nest.md) | Enable Nest filter | _On_ | +| [`FLB_FILTER_PARSER`](../../pipeline/filters/parser.md) | Enable Parser filter | _On_ | +| [`FLB_FILTER_RECORD_MODIFIER`](../../pipeline/filters/record-modifier.md) | Enable Record Modifier filter | _On_ | +| [`FLB_FILTER_REWRITE_TAG`](../../pipeline/filters/rewrite-tag.md) | Enable Rewrite Tag filter | _On_ | +| [`FLB_FILTER_STDOUT`](../../pipeline/filters/standard-output.md) | Enable Stdout filter | _On_ | +| [`FLB_FILTER_SYSINFO`](../../pipeline/filters/sysinfo.md) | Enable Sysinfo filter | _On_ | +| [`FLB_FILTER_THROTTLE`](../../pipeline/filters/throttle.md) | Enable Throttle filter | _On_ | +| [`FLB_FILTER_TYPE_CONVERTER`](../../pipeline/filters/type-converter.md) | Enable Type Converter filter | _On_ | +| [`FLB_FILTER_WASM`](../../pipeline/filters/wasm.md) | Enable WASM filter | _On_ | + +### Output plugins + +Output plugins let you flush the information to some external interface, service, or +terminal. The following table describes the output plugins available: + +| Option | Description | Default | | :--- | :--- | :--- | -| [FLB\_OUT\_AZURE](../../pipeline/outputs/azure.md) | Enable Microsoft Azure output plugin | On | -| [FLB\_OUT\_AZURE\_KUSTO](../../pipeline/outputs/azure_kusto.md) | Enable Azure Kusto output plugin | On | -| [FLB\_OUT\_BIGQUERY](../../pipeline/outputs/bigquery.md) | Enable Google BigQuery output plugin | On | -| [FLB\_OUT\_COUNTER](../../pipeline/outputs/counter.md) | Enable Counter output plugin | On | -| [FLB\_OUT\_CLOUDWATCH\_LOGS](../../pipeline/outputs/cloudwatch.md) | Enable Amazon CloudWatch output plugin | On | -| [FLB\_OUT\_DATADOG](../../pipeline/outputs/datadog.md) | Enable Datadog output plugin | On | -| [FLB\_OUT\_ES](../../pipeline/outputs/elasticsearch.md) | Enable [Elastic Search](http://www.elastic.co) output plugin | On | -| [FLB\_OUT\_FILE](../../pipeline/outputs/file.md) | Enable File output plugin | On | -| [FLB\_OUT\_KINESIS\_FIREHOSE](../../pipeline/outputs/firehose.md) | Enable Amazon Kinesis Data Firehose output plugin | On | -| [FLB\_OUT\_KINESIS\_STREAMS](../../pipeline/outputs/kinesis.md) | Enable Amazon Kinesis Data Streams output plugin | On | -| [FLB\_OUT\_FLOWCOUNTER](../../pipeline/outputs/flowcounter.md) | Enable Flowcounter output plugin | On | -| [FLB\_OUT\_FORWARD](../../pipeline/outputs/forward.md) | Enable [Fluentd](http://www.fluentd.org) output plugin | On | -| [FLB\_OUT\_GELF](../../pipeline/outputs/gelf.md) | Enable Gelf output plugin | On | -| [FLB\_OUT\_HTTP](../../pipeline/outputs/http.md) | Enable HTTP output plugin | On | -| [FLB\_OUT\_INFLUXDB](../../pipeline/outputs/influxdb.md) | Enable InfluxDB output plugin | On | -| [FLB\_OUT\_KAFKA](../../pipeline/outputs/kafka.md) | Enable Kafka output | Off | -| [FLB\_OUT\_KAFKA\_REST](../../pipeline/outputs/kafka-rest-proxy.md) | Enable Kafka REST Proxy output plugin | On | -| FLB\_OUT\_LIB | Enable Lib output plugin | On | -| [FLB\_OUT\_NATS](../../pipeline/outputs/nats.md) | Enable [NATS](http://www.nats.io) output plugin | On | -| FLB\_OUT\_NULL | Enable NULL output plugin | On | -| FLB\_OUT\_PGSQL | Enable PostgreSQL output plugin | On | -| FLB\_OUT\_PLOT | Enable Plot output plugin | On | -| FLB\_OUT\_SLACK | Enable Slack output plugin | On | -| [FLB\_OUT\_S3](../../pipeline/outputs/s3.md) | Enable Amazon S3 output plugin | On | -| [FLB\_OUT\_SPLUNK](../../pipeline/outputs/splunk.md) | Enable Splunk output plugin | On | -| [FLB\_OUT\_STACKDRIVER](../../pipeline/outputs/stackdriver.md) | Enable Google Stackdriver output plugin | On | -| [FLB\_OUT\_STDOUT](build-and-install.md) | Enable STDOUT output plugin | On | -| FLB\_OUT\_TCP | Enable TCP/TLS output plugin | On | -| [FLB\_OUT\_TD](../../pipeline/outputs/treasure-data.md) | Enable [Treasure Data](http://www.treasuredata.com) output plugin | On | - +| [`FLB_OUT_AZURE`](../../pipeline/outputs/azure.md) | Enable Microsoft Azure output plugin | _On_ | +| [`FLB_OUT_AZURE_KUSTO`](../../pipeline/outputs/azure_kusto.md) | Enable Azure Kusto output plugin | _On_ | +| [`FLB_OUT_BIGQUERY`](../../pipeline/outputs/bigquery.md) | Enable Google BigQuery output plugin | _On_ | +| [`FLB_OUT_COUNTER`](../../pipeline/outputs/counter.md) | Enable Counter output plugin | _On_ | +| [`FLB_OUT_CLOUDWATCH_LOGS`](../../pipeline/outputs/cloudwatch.md) | Enable Amazon CloudWatch output plugin | _On_ | +| [`FLB_OUT_DATADOG`](../../pipeline/outputs/datadog.md) | Enable Datadog output plugin | _On_ | +| [`FLB_OUT_ES`](../../pipeline/outputs/elasticsearch.md) | Enable [Elastic Search](http://www.elastic.co) output plugin | _On_ | +| [`FLB_OUT_FILE`](../../pipeline/outputs/file.md) | Enable File output plugin | _On_ | +| [`FLB_OUT_KINESIS_FIREHOSE`](../../pipeline/outputs/firehose.md) | Enable Amazon Kinesis Data Firehose output plugin | _On_ | +| [`FLB_OUT_KINESIS_STREAMS`](../../pipeline/outputs/kinesis.md) | Enable Amazon Kinesis Data Streams output plugin | _On_ | +| [`FLB_OUT_FLOWCOUNTER`](../../pipeline/outputs/flowcounter.md) | Enable Flowcounter output plugin | _On_ | +| [`FLB_OUT_FORWARD`](../../pipeline/outputs/forward.md) | Enable [Fluentd](http://www.fluentd.org) output plugin | _On_ | +| [`FLB_OUT_GELF`](../../pipeline/outputs/gelf.md) | Enable Gelf output plugin | _On_ | +| [`FLB_OUT_HTTP`](../../pipeline/outputs/http.md) | Enable HTTP output plugin | _On_ | +| [`FLB_OUT_INFLUXDB`](../../pipeline/outputs/influxdb.md) | Enable InfluxDB output plugin | _On_ | +| [`FLB_OUT_KAFKA`](../../pipeline/outputs/kafka.md) | Enable Kafka output | Off | +| [`FLB_OUT_KAFKA_REST`](../../pipeline/outputs/kafka-rest-proxy.md) | Enable Kafka REST Proxy output plugin | _On_ | +| `FLB_OUT_LIB` | Enable Lib output plugin | _On_ | +| [`FLB_OUT_NATS`](../../pipeline/outputs/nats.md) | Enable [NATS](http://www.nats.io) output plugin | _On_ | +| `FLB_OUT_NULL` | Enable NULL output plugin | _On_ | +| `FLB_OUT_PGSQL` | Enable PostgreSQL output plugin | _On_ | +| `FLB_OUT_PLOT` | Enable Plot output plugin | _On_ | +| `FLB_OUT_SLACK` | Enable Slack output plugin | _On_ | +| [`FLB_OUT_S3`](../../pipeline/outputs/s3.md) | Enable Amazon S3 output plugin | _On_ | +| [`FLB_OUT_SPLUNK`](../../pipeline/outputs/splunk.md) | Enable Splunk output plugin | _On_ | +| [`FLB_OUT_STACKDRIVER`](../../pipeline/outputs/stackdriver.md) | Enable Google Stackdriver output plugin | _On_ | +| [`FLB_OUT_STDOUT`](build-and-install.md) | Enable STDOUT output plugin | _On_ | +| `FLB_OUT_TCP` | Enable TCP/TLS output plugin | _On_ | +| [`FLB_OUT_TD`](../../pipeline/outputs/treasure-data.md) | Enable [Treasure Data](http://www.treasuredata.com) output plugin | _On_ | + +### Processor plugins + +Processor plugins handle the events within the processor pipelines to allow +modifying, enriching, or dropping events. + +The following table describes the processors available: + +| Option | Description | Default || :--- | :--- | :--- | +| [`FLB_PROCESSOR_METRICS_SELECTOR`](../../pipeline/processors/metrics-selector.md) | Enable metrics selector processor | _On_ | +| [`FLB_PROCESSOR_LABELS`](../../pipeline/processors/labels.md) | Enable metrics label manipulation processor | _On_ | diff --git a/installation/sources/build-with-static-configuration.md b/installation/sources/build-with-static-configuration.md index 9e1b9dd7f..31430320a 100644 --- a/installation/sources/build-with-static-configuration.md +++ b/installation/sources/build-with-static-configuration.md @@ -1,20 +1,36 @@ -# Build with Static Configuration +# Build with static configuration -[Fluent Bit](https://fluentbit.io) in normal operation mode allows to be configurable through [text files](https://github.com/fluent/fluent-bit-docs/tree/8ab2f4cda8dfdd8def7fa0cf5c7ffc23069e5a70/installation/configuration/file.md) or using specific arguments in the command line, while this is the ideal deployment case, there are scenarios where a more restricted configuration is required: static configuration mode. +[Fluent Bit](https://fluentbit.io) in normal operation mode is configurable through +[text files](/installation/configuration/file.md) +or using specific arguments in the command line. Although this is the ideal deployment +case, there are scenarios where a more restricted configuration is required. Static +configuration mode restricts configuration ability. -Static configuration mode aims to include a built-in configuration in the final binary of Fluent Bit, disabling the usage of external files or flags at runtime. +Static configuration mode includes a built-in configuration in the final binary of +Fluent Bit, disabling the usage of external files or flags at runtime. -## Getting Started +## Get started ### Requirements -The following steps assumes you are familiar with configuring Fluent Bit using text files and you have experience building it from scratch as described in the [Build and Install](https://github.com/fluent/fluent-bit-docs/tree/8ab2f4cda8dfdd8def7fa0cf5c7ffc23069e5a70/installation/sources/build_install.md) section. +The following steps assume you are familiar with configuring Fluent Bit using text +files and you have experience building it from scratch as described in +[Build and Install](build-and-install.md). #### Configuration Directory -In your file system prepare a specific directory that will be used as an entry point for the build system to lookup and parse the configuration files. It is mandatory that this directory contain as a minimum one configuration file called _fluent-bit.conf_ containing the required [SERVICE](https://github.com/fluent/fluent-bit-docs/tree/8ab2f4cda8dfdd8def7fa0cf5c7ffc23069e5a70/installation/configuration/file.md#config_section), [INPUT](https://github.com/fluent/fluent-bit-docs/tree/8ab2f4cda8dfdd8def7fa0cf5c7ffc23069e5a70/installation/sources/configuration/file.md#config_input) and [OUTPUT](https://github.com/fluent/fluent-bit-docs/tree/8ab2f4cda8dfdd8def7fa0cf5c7ffc23069e5a70/installation/configuration/file.md#config_output) sections. As an example create a new _fluent-bit.conf_ file with the following content: +In your file system, prepare a specific directory that will be used as an entry +point for the build system to lookup and parse the configuration files. This +directory must contain a minimum of one configuration file called +`fluent-bit.conf` containing the required +[SERVICE](/administration/configuring-fluent-bit/yaml/service-section.md), +[INPUT](/concepts/data-pipeline/input.md), and [OUTPUT](/concepts/data-pipeline/output.md) +sections. -```python +As an example, create a new `fluent-bit.conf` file with the following +content: + +```python copy [SERVICE] Flush 1 Daemon off @@ -28,31 +44,37 @@ In your file system prepare a specific directory that will be used as an entry p Match * ``` -the configuration provided above will calculate CPU metrics from the running system and print them to the standard output interface. +This configuration calculates CPU metrics from the running system and prints them +to the standard output interface. -#### Build with Custom Configuration +#### Build with custom configuration -Inside Fluent Bit source code, get into the build/ directory and run CMake appending the FLB\_STATIC\_CONF option pointing the configuration directory recently created, e.g: +1. Go to the Fluent Bit source code build directory: -```bash -$ cd fluent-bit/build/ -$ cmake -DFLB_STATIC_CONF=/path/to/my/confdir/ -``` + ```bash copy + cd fluent-bit/build/ + ``` -then build it: +1. Run CMake, appending the `FLB_STATIC_CONF` option pointing to + the configuration directory recently created: -```bash -$ make -``` + ```bash copy + cmake -DFLB_STATIC_CONF=/path/to/my/confdir/ + ``` + +1. Build Fluent Bit: -At this point the fluent-bit binary generated is ready to run without necessity of further configuration: + ```bash copy + make + ``` + +The generated `fluent-bit` binary is ready to run without additional configuration: ```bash -$ bin/fluent-bit +$ bin/fluent-bit Fluent-Bit v0.15.0 Copyright (C) Treasure Data [2018/10/19 15:32:31] [ info] [engine] started (pid=15186) [0] cpu.local: [1539984752.000347547, {"cpu_p"=>0.750000, "user_p"=>0.500000, "system_p"=>0.250000, "cpu0.p_cpu"=>1.000000, "cpu0.p_user"=>1.000000, "cpu0.p_system"=>0.000000, "cpu1.p_cpu"=>0.000000, "cpu1.p_user"=>0.000000, "cpu1.p_system"=>0.000000, "cpu2.p_cpu"=>0.000000, "cpu2.p_user"=>0.000000, "cpu2.p_system"=>0.000000, "cpu3.p_cpu"=>1.000000, "cpu3.p_user"=>1.000000, "cpu3.p_system"=>0.000000}] ``` - diff --git a/installation/sources/download-source-code.md b/installation/sources/download-source-code.md index b82d74098..729f3144c 100644 --- a/installation/sources/download-source-code.md +++ b/installation/sources/download-source-code.md @@ -1,23 +1,31 @@ -# Download Source Code +# Download source code + +You can download the most recent stable or development source code. ## Stable -For production systems, we strongly suggest that you always get the latest stable release of the source code in either zip or tarball format from Github using the following link pattern: +For production systems, it's strongly suggested that you get the latest stable release +of the source code in either zip file or tarball file format from GitHub using the +following link pattern: +```text https://github.com/fluent/fluent-bit/archive/refs/tags/v<release version>.tar.gz https://github.com/fluent/fluent-bit/archive/refs/tags/v<release version>.zip +``` -For example for version 1.8.12 the link is the following: [https://github.com/fluent/fluent-bit/archive/refs/tags/v1.8.12.tar.gz](https://github.com/fluent/fluent-bit/archive/refs/tags/v1.8.12.tar.gz) +For example, for version 1.8.12 the link is: [https://github.com/fluent/fluent-bit/archive/refs/tags/v1.8.12.tar.gz](https://github.com/fluent/fluent-bit/archive/refs/tags/v1.8.12.tar.gz) ## Development -For anyone who aims to contribute to the project by testing or extending the code base, you can get the development version from our GIT repository: +If you want to contribute to Fluent Bit, you should use the most recent code. You can +get the development version from the Git repository: ```bash -$ git clone https://github.com/fluent/fluent-bit +git clone https://github.com/fluent/fluent-bit ``` -Note that our _master_ branch is where the development of Fluent Bit happens. -Since it's a development version, expect issues when compiling or at run time. +The `master` branch is where the development of Fluent Bit happens. +Development version users should expect issues when compiling or at run time. -We encourage everybody to help us testing every development version, at the end this is what will become stable. +Fluent Bit users are encouraged to help test every development version to ensure a +stable release. diff --git a/installation/supported-platforms.md b/installation/supported-platforms.md index 778edbb6a..908a1e9d4 100644 --- a/installation/supported-platforms.md +++ b/installation/supported-platforms.md @@ -1,33 +1,39 @@ -# Supported Platforms +# Supported platforms -The following operating systems and architectures are supported in Fluent Bit. +Fluent Bit supports the following operating systems and architectures: | Operating System | Distribution | Architectures | | :--- | :--- | :--- | -| Linux | [Amazon Linux 2023](linux/amazon-linux.md) | x86\_64, Arm64v8 | -| | [Amazon Linux 2](linux/amazon-linux.md) | x86\_64, Arm64v8 | -| | [Centos 9 Stream](linux/redhat-centos.md) | x86\_64, Arm64v8 | -| | [Centos 8](linux/redhat-centos.md) | x86\_64, Arm64v8 | -| | [Centos 7](linux/redhat-centos.md) | x86\_64, Arm64v8 | -| | [Rocky Linux 8](linux/redhat-centos.md) | x86\_64, Arm64v8 | -| | [Alma Linux 8](linux/redhat-centos.md) | x86\_64, Arm64v8 | -| | [Debian 12 \(Bookworm\)](linux/debian.md) | x86\_64, Arm64v8 | -| | [Debian 11 \(Bullseye\)](linux/debian.md) | x86\_64, Arm64v8 | -| | [Debian 10 \(Buster\)](linux/debian.md) | x86\_64, Arm64v8 | -| | [Ubuntu 22.04 \(Jammy Jellyfish\)](linux/ubuntu.md) | x86\_64, Arm64v8 | -| | [Ubuntu 20.04 \(Focal Fossa\)](linux/ubuntu.md) | x86\_64, Arm64v8 | -| | [Ubuntu 18.04 \(Bionic Beaver\)](linux/ubuntu.md) | x86\_64, Arm64v8 | -| | [Ubuntu 16.04 \(Xenial Xerus\)](linux/ubuntu.md) | x86\_64 | -| | [Raspbian 11 \(Bullseye\)](linux/raspbian-raspberry-pi.md) | Arm32v7 | -| | [Raspbian 10 \(Buster\)](linux/raspbian-raspberry-pi.md) | Arm32v7 | +| Linux | [Amazon Linux 2023](linux/amazon-linux.md) | x86_64, Arm64v8 | +| | [Amazon Linux 2](linux/amazon-linux.md) | x86_64, Arm64v8 | +| | [CentOS 9 Stream](linux/redhat-centos.md) | x86_64, Arm64v8 | +| | [CentOS 8](linux/redhat-centos.md) | x86_64, Arm64v8 | +| | [CentOS 7](linux/redhat-centos.md) | x86_64, Arm64v8 | +| | [Rocky Linux 8](linux/redhat-centos.md) | x86_64, Arm64v8 | +| | [Alma Linux 8](linux/redhat-centos.md) | x86_64, Arm64v8 | +| | [Debian 12 (Bookworm)](linux/debian.md) | x86_64, Arm64v8 | +| | [Debian 11 (Bullseye)](linux/debian.md) | x86_64, Arm64v8 | +| | [Debian 10 (Buster)](linux/debian.md) | x86_64, Arm64v8 | +| | [Ubuntu 24.04 (Noble Numbat)](linux/ubuntu.md) | x86_64, Arm64v8 | +| | [Ubuntu 22.04 (Jammy Jellyfish)](linux/ubuntu.md) | x86_64, Arm64v8 | +| | [Ubuntu 20.04 (Focal Fossa)](linux/ubuntu.md) | x86_64, Arm64v8 | +| | [Ubuntu 18.04 (Bionic Beaver)](linux/ubuntu.md) | x86_64, Arm64v8 | +| | [Ubuntu 16.04 (Xenial Xerus)](linux/ubuntu.md) | x86_64 | +| | [Raspbian 11 (Bullseye)](linux/raspbian-raspberry-pi.md) | Arm32v7 | +| | [Raspbian 10 (Buster)](linux/raspbian-raspberry-pi.md) | Arm32v7 | | macOS | * | x86_64, Apple M1 | -| Windows | [Windows Server 2019](windows.md) | x86\_64, x86 | -| | [Windows 10 1903](windows.md) | x86\_64, x86 | +| Windows | [Windows Server 2019](windows.md) | x86_64, x86 | +| | [Windows 10 1903](windows.md) | x86_64, x86 | -From an architecture support perspective, Fluent Bit is fully functional on x86\_64, Arm64v8 and Arm32v7 based processors. +From an architecture support perspective, Fluent Bit is fully functional on x86_64, +Arm64v8, and Arm32v7 based processors. -Fluent Bit can work also on OSX and \*BSD systems, but not all plugins will be available on all platforms. -Official support will be expanding based on community demand. -Fluent Bit may run on older operating systems though will need to be built from source, or use custom packages from [enterprise providers](https://fluentbit.io/enterprise). +Fluent Bit can work also on macOS and Berkeley Software Distribution (BSD) systems, +but not all plugins will be available on all platforms. -Fluent Bit is supported on Linux on IBM Z (s390x) environment with some restrictions but only container images are provided for these targets officially. +Official support is based on community demand. Fluent Bit might run on older operating +systems, but must be built from source, or using custom packages from +[enterprise providers](https://fluentbit.io/enterprise). + +Fluent Bit is supported for Linux on IBM Z (s390x) environments with some +restrictions, but only container images are provided for these targets officially. diff --git a/installation/upgrade-notes.md b/installation/upgrade-notes.md index 68d21f8cb..6e59be160 100644 --- a/installation/upgrade-notes.md +++ b/installation/upgrade-notes.md @@ -1,13 +1,19 @@ # Upgrade Notes -The following article cover the relevant notes for users upgrading from previous Fluent Bit versions. We aim to cover compatibility changes that you must be aware of. +The following article covers the relevant compatibility changes for users upgrading +from previous Fluent Bit versions. -For more details about changes on each release please refer to the [Official Release Notes](https://fluentbit.io/announcements/). +For more details about changes on each release, refer to the +[Official Release Notes](https://fluentbit.io/announcements/). -Note: release notes will be prepared in advance of a Git tag for a release so an official release should provide both a tag and a release note together to allow users to verify and understand the release contents. +Release notes will be prepared in advance of a Git tag for a release. An official +release should provide both a tag and a release note together to allow users to +verify and understand the release contents. -The tag drives the overall binary release process so release binaries (containers/packages) will appear after a tag and its associated release note. -This allows users to expect the new release binary to appear and allow/deny/update it as appropriate in their infrastructure. +The tag drives the binary release process. Release binaries (containers and packages) +will appear after a tag and its associated release note. This lets users to expect +the new release binary to appear and allow/deny/update it as appropriate in their +infrastructure. ## Fluent Bit v1.9.9 @@ -16,42 +22,63 @@ Users should switch to the `fluent-bit` package. ## Fluent Bit v1.6 -If you are migrating from previous version of Fluent Bit please review the following important changes: +If you are migrating from previous version of Fluent Bit, review the following +important changes: ### Tail Input Plugin -Now by default the plugin follows a file from the end once the service starts \(old behavior was always read from the beginning\). For every file found at start, its followed from it last position, for new files discovered at runtime or rotated, they are read from the beginning. +By default, the tail input plugin follows a file from the end after the service starts, +instead of reading it from the beginning. Every file found when the plugin starts is +followed from it last position. New files discovered at runtime or when files rotate +are read from the beginning. -If you desire to keep the old behavior you can set the option `read_from_head` to true. +To keep the old behavior, set the option `read_from_head` to `true`. ### Stackdriver Output Plugin -The project\_id of [resource](https://cloud.google.com/logging/docs/reference/v2/rest/v2/MonitoredResource) in [LogEntry](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry) sent to Google Cloud Logging would be set to the project ID rather than the project number. To learn the difference between Project ID and project number, see [this](https://cloud.google.com/resource-manager/docs/creating-managing-projects#before_you_begin) for more details. +The `project_id` of +[resource](https://cloud.google.com/logging/docs/reference/v2/rest/v2/MonitoredResource) +in [LogEntry](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry) +sent to Google Cloud Logging would be set to the `project_id` rather than the project +number. To learn the difference between Project ID and project number, see +[Creating and managing projects](https://cloud.google.com/resource-manager/docs/creating-managing-projects#before_you_begin). -If you have any existing queries based on the resource's project\_id, please update your query accordingly. +If you have existing queries based on the resource's `project_id,` update your query accordingly. ## Fluent Bit v1.5 The migration from v1.4 to v1.5 is pretty straightforward. -* If you enabled `keepalive` mode in your configuration, note that this configuration property has been renamed to `net.keepalive`. Now all Network I/O keepalive is enabled by default, to learn more about this and other associated configuration properties read the [Networking Administration](https://docs.fluentbit.io/manual/administration/networking#tcp-keepalive) section. -* If you use the Elasticsearch output plugin, note the default value of `type` [changed from `flb_type` to `_doc`](https://github.com/fluent/fluent-bit/commit/04ed3d8104ca8a2f491453777ae6e38e5377817e#diff-c9ae115d3acaceac5efb949edbb21196). Many versions of Elasticsearch will tolerate this, but ES v5.6 through v6.1 require a type _without_ a leading underscore. See the [Elasticsearch output plugin documentation FAQ entry](https://docs.fluentbit.io/manual/pipeline/outputs/elasticsearch#faq-underscore) for more. +- The `keepalive` configuration mode has been renamed to `net.keepalive`. Now, + all Network I/O keepalive is enabled by default. To learn more about this and other + associated configuration properties read the + [Networking Administration](https://docs.fluentbit.io/manual/administration/networking#tcp-keepalive) + section. +- If you use the Elasticsearch output plugin, the default value of `type` + [changed from `flb_type` to `_doc`](https://github.com/fluent/fluent-bit/commit/04ed3d8104ca8a2f491453777ae6e38e5377817e#diff-c9ae115d3acaceac5efb949edbb21196). + Many versions of Elasticsearch tolerate this, but Elasticsearch v5.6 through v6.1 + require a `type` without a leading underscore. See the + [Elasticsearch output plugin documentation FAQ entry](https://docs.fluentbit.io/manual/pipeline/outputs/elasticsearch#faq-underscore) for more. ## Fluent Bit v1.4 -If you are migrating from Fluent Bit v1.3, there are no breaking changes. Just new exciting features to enjoy :\) +If you are migrating from Fluent Bit v1.3, there are no breaking changes. ## Fluent Bit v1.3 -If you are migrating from Fluent Bit v1.2 to v1.3, there are no breaking changes. If you are upgrading from an older version please review the incremental changes below. +If you are migrating from Fluent Bit v1.2 to v1.3, there are no breaking changes. +If you are upgrading from an older version, review the following incremental changes: ## Fluent Bit v1.2 ### Docker, JSON, Parsers and Decoders -On Fluent Bit v1.2 we have fixed many issues associated with JSON encoding and decoding, for hence when parsing Docker logs **is no longer necessary** to use decoders. The new Docker parser looks like this: +Fluent Bit v1.2 fixed many issues associated with JSON encoding and decoding. -```text +For example, when parsing Docker logs, it's no longer necessary to use decoders. The +new Docker parser looks like this: + +```python [PARSER] Name docker Format json @@ -60,15 +87,17 @@ On Fluent Bit v1.2 we have fixed many issues associated with JSON encoding and d Time_Keep On ``` -> Note: again, do not use decoders. - ### Kubernetes Filter -We have done improvements also on how Kubernetes Filter handle the stringified _log_ message. If the option _Merge\_Log_ is enabled, it will try to handle the log content as a JSON map, if so, it will add the keys to the root map. +Fluent Bit made improvements to Kubernetes Filter handling of stringified `log` +messages. If the `Merge_Log` option is enabled, it will try to handle the log content +as a JSON map, if so, it will add the keys to the root map. -In addition, we have fixed and improved the option called _Merge\_Log\_Key_. If a merge log succeed, all new keys will be packaged under the key specified by this option, a suggested configuration is as follows: +In addition, fixes and improvements were made to the `Merge_Log_Key` option. If a +merge log succeed, all new keys will be packaged under the key specified by this +option. A suggested configuration is as follows: -```text +```python [FILTER] Name Kubernetes Match kube.* @@ -97,15 +126,20 @@ the final record will be composed as follows: ## Fluent Bit v1.1 -If you are upgrading from **Fluent Bit <= 1.0.x** you should take in consideration the following relevant changes when switching to **Fluent Bit v1.1** series: +If you are upgrading from Fluent Bit 1.0.x or earlier, review the following relevant +changes when switching to Fluent Bit v1.1 or later series: -### Kubernetes Filter +### Kubernetes filter -We introduced a new configuration property called _Kube\_Tag\_Prefix_ to help Tag prefix resolution and address an unexpected behavior that landed in previous versions. +Fluent Bit introduced a new configuration property called `Kube_Tag_Prefix` to help +Tag prefix resolution and address an unexpected behavior in previous versions. -During 1.0.x release cycle, a commit in Tail input plugin changed the default behavior on how the Tag was composed when using the wildcard for expansion generating breaking compatibility with other services. Consider the following configuration example: +During the `1.0.x` release cycle, a commit in the Tail input plugin changed the +default behavior on how the Tag was composed when using the wildcard for expansion +generating breaking compatibility with other services. Consider the following +configuration example: -```text +```python [INPUT] Name tail Path /var/log/containers/*.log @@ -118,19 +152,26 @@ The expected behavior is that Tag will be expanded to: kube.var.log.containers.apache.log ``` -but the change introduced in 1.0 series switched from absolute path to the base file name only: +The change introduced in the 1.0 series switched from absolute path to the base +filename only: ```text kube.apache.log ``` -On Fluent Bit v1.1 release we restored to our default behavior and now the Tag is composed using the absolute path of the monitored file. +THe Fluent Bit v1.1 release restored the default behavior and now the Tag is +composed using the absolute path of the monitored file. -> Having absolute path in the Tag is relevant for routing and flexible configuration where it also helps to keep compatibility with Fluentd behavior. +Having absolute path in the Tag is relevant for routing and flexible configuration +where it also helps to keep compatibility with Fluentd behavior. -This behavior switch in Tail input plugin affects how Filter Kubernetes operates. As you know when the filter is used it needs to perform local metadata lookup that comes from the file names when using Tail as a source. Now with the new _Kube\_Tag\_Prefix_ option you can specify what's the prefix used in Tail input plugin, for the configuration example above the new configuration will look as follows: +This behavior switch in Tail input plugin affects how Filter Kubernetes operates. +When the filter is used it needs to perform local metadata lookup that comes from the +file names when using Tail as a source. With the new `Kube_Tag_Prefix` option +you can specify the prefix used in the Tail input plugin. For the previous configuration +example the new configuration will look like: -```text +```python [INPUT] Name tail Path /var/log/containers/*.log @@ -142,4 +183,5 @@ This behavior switch in Tail input plugin affects how Filter Kubernetes operates Kube_Tag_Prefix kube.var.log.containers. ``` -So the proper for _Kube\_Tag\_Prefix_ value must be composed by Tag prefix set in Tail input plugin plus the converted monitored directory replacing slashes with dots. +The proper value for `Kube_Tag_Prefix` must be composed by Tag prefix set in Tail +input plugin plus the converted monitored directory replacing slashes with dots. diff --git a/installation/windows.md b/installation/windows.md index c20604445..60f66a396 100644 --- a/installation/windows.md +++ b/installation/windows.md @@ -1,12 +1,18 @@ # Windows -Fluent Bit is distributed as **fluent-bit** package for Windows and as a [Windows container on Docker Hub](docker.md). Fluent Bit has two flavours of Windows installers: a ZIP archive (for quick testing) and an EXE installer (for system installation). +Fluent Bit is distributed as the `fluent-bit` package for Windows and as a +[Windows container on Docker Hub](docker.md). Fluent Bit provides two Windows +installers: a `ZIP` archive and an `EXE` installer. -Not all plugins are supported on Windows: the [CMake configuration](https://github.com/fluent/fluent-bit/blob/master/cmake/windows-setup.cmake) shows the default set of supported plugins. +Not all plugins are supported on Windows. The +[CMake configuration](https://github.com/fluent/fluent-bit/blob/master/cmake/windows-setup.cmake) +shows the default set of supported plugins. ## Configuration -Make sure to provide a valid Windows configuration with the installation, a sample one is shown below: +Provide a valid Windows configuration with the installation. + +The following configuration is an example: ```python [SERVICE] @@ -75,74 +81,78 @@ Make sure to provide a valid Windows configuration with the installation, a samp ## Migration to Fluent Bit -From version 1.9, `td-agent-bit` is a deprecated package and was removed after 1.9.9. The correct package name to use now is `fluent-bit`. +For version 1.9 and later, `td-agent-bit` is a deprecated package and was removed +after 1.9.9. The correct package name to use now is `fluent-bit`. -## Installation Packages +## Installation packages -The latest stable version is 2.2.2. -Each version is available via the following download URLs. +The latest stable version is 4.0.0. +Each version is available from the following download URLs. -| INSTALLERS | SHA256 CHECKSUMS | -| ------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------- | -| [fluent-bit-2.2.2-win32.exe](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win32.exe) | [d12c689326deae3348f73772cab464990ed16572cfdc1874193e1556ade76752](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win32.exe.sha256) | -| [fluent-bit-2.2.2-win32.zip](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win32.zip) | [73c4cbbb2c6cda3437ce43990105375fb139dccad71a7093cf0b830f0e742bba](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win32.zip.sha256) | -| [fluent-bit-2.2.2-win64.exe](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win64.exe) | [03864572aeae8ed44cf05e7f93592d9c0c13220c3c2e2b6eb17861782ffbfbe8](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win64.exe.sha256) | -| [fluent-bit-2.2.2-win64.zip](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win64.zip) | [89a95ddc98b51a80d257fb9f32fee36507a1f4cde8d9a9dafd8dee10d3be1988](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win64.zip.sha256) | -| [fluent-bit-2.2.2-winarm64.exe](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-winarm64.exe) | [8dd1e48c1ea4e7b2549ad3a82bc2f0e7ac9e5abc4b5efabf9dc2e1208416e7a0](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-winarm64.exe.sha256) | -| [fluent-bit-2.2.2-winarm64.zip](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-winarm64.zip) | [1fb418953eb081992e6432cfe1c167607dafae370c7a8ab2cf69d6a8c769bc9b](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-winarm64.zip.sha256) | +| INSTALLERS | SHA256 CHECKSUMS | +|----------- | ---------------- | +| [fluent-bit-4.0.0-win32.exe](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win32.exe) | [2676f127b2b71d44f494027fbac4a20bc8be2257fe8a201b28b9780056bde24f](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win32.exe.sha256) | +| [fluent-bit-4.0.0-win32.zip](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win32.zip) | [cb32a9e27134a9861d427a61be4ffd056abc3f74fa1d185e269037538dda9faa](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win32.zip.sha256) | +| [fluent-bit-4.0.0-win64.exe](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win64.exe) | [c4173fe51f81dc3108d6036687d8d0b715f619ffcfb04223ab1c31ef4284ff92](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win64.exe.sha256) | +| [fluent-bit-4.0.0-win64.zip](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win64.zip) | [cc8ac4c5b9d2953ffb3ff39b72f9d18aaf0ede2615317d9437c5d1f53b9321be](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win64.zip.sha256) | +| [fluent-bit-4.0.0-winarm64.exe](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-winarm64.exe) | [6bddf88feaf298f79517566013537ed3f7003f72509db9c5a7719b8dd022515b](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-winarm64.exe.sha256) | +| [fluent-bit-4.0.0-winarm64.zip](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-winarm64.zip) | [51179cb8e9b4dd4ea1d930e8913f03f42688c99c6b8ddbf8da9b1a67938a1d5f](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-winarm64.zip.sha256) | -**Note these are now using the Github Actions built versions, the legacy AppVeyor builds are still available (AMD 32/64 only) at releases.fluentbit.io but are deprecated.** +These are now using the Github Actions built versions. Legacy AppVeyor builds are +still available (AMD 32/64 only) at releases.fluentbit.io but are deprecated. MSI installers are also available: -- [fluent-bit-2.2.2-win32.msi](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win32.msi) -- [fluent-bit-2.2.2-win64.msi](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-win64.msi) -- [fluent-bit-2.2.2-winarm64.msi](https://packages.fluentbit.io/windows/fluent-bit-2.2.2-winarm64.msi) +- [fluent-bit-4.0.0-win32.msi](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win32.msi) +- [fluent-bit-4.0.0-win64.msi](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-win64.msi) +- [fluent-bit-4.0.0-winarm64.msi](https://packages.fluentbit.io/windows/fluent-bit-4.0.0-winarm64.msi) -To check the integrity, use `Get-FileHash` cmdlet on PowerShell. +To check the integrity, use the `Get-FileHash` cmdlet for PowerShell. -```powershell -PS> Get-FileHash fluent-bit-2.2.2-win32.exe +```text copy +PS> Get-FileHash fluent-bit-4.0.0-win32.exe ``` -## Installing from ZIP archive +## Installing from a ZIP archive -Download a ZIP archive from above. There are installers for 32-bit and 64-bit environments, so choose one suitable for your environment. +1. Download a ZIP archive. Choose the suitable installers for your 32-bit or 64-bit + environments. -Then you need to expand the ZIP archive. You can do this by clicking "Extract All" on Explorer, or if you're using PowerShell, you can use `Expand-Archive` cmdlet. +1. Expand the ZIP archive. You can do this by clicking **Extract All** in Explorer + or `Expand-Archive` in PowerShell. -```powershell -PS> Expand-Archive fluent-bit-2.2.2-win64.zip -``` + ```text + PS> Expand-Archive fluent-bit-4.0.0-win64.zip + ``` -The ZIP package contains the following set of files. + The ZIP package contains the following set of files. -``` -fluent-bit -├── bin -│ ├── fluent-bit.dll -│ └── fluent-bit.exe -│ └── fluent-bit.pdb -├── conf -│ ├── fluent-bit.conf -│ ├── parsers.conf -│ └── plugins.conf -└── include - │ ├── flb_api.h - │ ├── ... - │ └── flb_worker.h - └── fluent-bit.h -``` + ```text + fluent-bit + ├── bin + │ ├── fluent-bit.dll + │ └── fluent-bit.exe + │ └── fluent-bit.pdb + ├── conf + │ ├── fluent-bit.conf + │ ├── parsers.conf + │ └── plugins.conf + └── include + │ ├── flb_api.h + │ ├── ... + │ └── flb_worker.h + └── fluent-bit.h + ``` -Now, launch cmd.exe or PowerShell on your machine, and execute `fluent-bit.exe` as follows. +1. Launch `cmd.exe` or PowerShell on your machine, and execute `fluent-bit.exe`: -```powershell -PS> .\bin\fluent-bit.exe -i dummy -o stdout -``` + ```text + PS> .\bin\fluent-bit.exe -i dummy -o stdout + ``` -If you see the following output, it's working fine! +The following output indicates Fluent Bit is running: -```powershell +```text PS> .\bin\fluent-bit.exe -i dummy -o stdout Fluent Bit v2.0.x * Copyright (C) 2019-2020 The Fluent Bit Authors @@ -161,41 +171,49 @@ Fluent Bit v2.0.x [3] dummy.0: [1561684388.441405800, {"message"=>"dummy"}] ``` -To halt the process, press CTRL-C in the terminal. +To halt the process, press `Control+C` in the terminal. -## Installing from EXE installer +## Installing from the EXE installer -Download an EXE installer from the [download page](https://fluentbit.io/download/). It has both 32-bit and 64-bit builds. Choose one which is suitable for you. +1. Download an EXE installer for the appropriate 32-bit or 64-bit build. +1. Double-click the EXE installer you've downloaded. The installation wizard starts. -Double-click the EXE installer you've downloaded. The installation wizard will automatically start. +  (1).png>) - (1).png>) +1. Click **Next** and finish the installation. By default, Fluent Bit is installed + in `C:\Program Files\fluent-bit\`. -Click Next and proceed. By default, Fluent Bit is installed into `C:\Program Files\fluent-bit\`, so you should be able to launch fluent-bit as follows after installation. +You should be able to launch Fluent Bit using the following PowerShell command:. -```powershell +```text PS> C:\Program Files\fluent-bit\bin\fluent-bit.exe -i dummy -o stdout ``` ### Installer options -The Windows installer is built by \[`CPack` using NSIS([https://cmake.org/cmake/help/latest/cpack\_gen/nsis.html](https://cmake.org/cmake/help/latest/cpack\_gen/nsis.html)) and so supports the [default options](https://nsis.sourceforge.io/Docs/Chapter3.html#3.2.1) that all NSIS installers do for silent installation and the directory to install to. +The Windows installer is built by +[`CPack` using NSIS](https://cmake.org/cmake/help/latest/cpack_gen/nsis.html) +and supports the [default NSIS options](https://nsis.sourceforge.io/Docs/Chapter3.html#3.2.1) +for silent installation and install directory. To silently install to `C:\fluent-bit` directory here is an example: -```powershell +```text PS> <installer exe> /S /D=C:\fluent-bit ``` -The uninstaller automatically provided also supports a silent un-install using the same `/S` flag. This may be useful for provisioning with automation like Ansible, Puppet, etc. +The uninstaller also supports a silent uninstall using the same `/S` flag. +This can be used for provisioning with automation like Ansible, Puppet, and so on. -## Windows Service Support +## Windows service support -Windows services are equivalent to "daemons" in UNIX (i.e. long-running background processes). Since v1.5.0, Fluent Bit has the native support for Windows Service. +Windows services are equivalent to daemons in UNIX (long-running background +processes). +For v1.5.0 and later, Fluent Bit has native support for Windows services. -Suppose you have the following installation layout: +For example, you have the following installation layout: -``` +```text C:\fluent-bit\ ├── conf │ ├── fluent-bit.conf @@ -207,15 +225,16 @@ C:\fluent-bit\ └── fluent-bit.pdb ``` -To register Fluent Bit as a Windows service, you need to execute the following command on Command Prompt. Please be careful that a single space is required after `binpath=`. +To register Fluent Bit as a Windows service, execute the following command on +at a command prompt. A single space is required after `binpath=`. -```powershell -% sc.exe create fluent-bit binpath= "\fluent-bit\bin\fluent-bit.exe -c \fluent-bit\conf\fluent-bit.conf" +```text +sc.exe create fluent-bit binpath= "\fluent-bit\bin\fluent-bit.exe -c \fluent-bit\conf\fluent-bit.conf" ``` -Now Fluent Bit can be started and managed as a normal Windows service. +Fluent Bit can be started and managed as a normal Windows service. -```powershell +```text % sc.exe start fluent-bit % sc.exe query fluent-bit SERVICE_NAME: fluent-bit @@ -224,45 +243,47 @@ SERVICE_NAME: fluent-bit ... ``` -To halt the Fluent Bit service, just execute the "stop" command. +To halt the Fluent Bit service, use the `stop` command. -```powershell -% sc.exe stop fluent-bit +```text +sc.exe stop fluent-bit ``` To start Fluent Bit automatically on boot, execute the following: -``` -% sc.exe config fluent-bit start= auto +```text +sc.exe config fluent-bit start= auto ``` -### \[FAQ] Fluent Bit fails to start up when installed under `C:\Program Files` +## FAQs -Quotations are required if file paths contain spaces. Here is an example: +### Fluent Bit fails to start up when installed under `C:\Program Files` -``` -% sc.exe create fluent-bit binpath= "\"C:\Program Files\fluent-bit\bin\fluent-bit.exe\" -c \"C:\Program Files\fluent-bit\conf\fluent-bit.conf\"" +Quotations are required if file paths contain spaces. For example: + +```text +sc.exe create fluent-bit binpath= "\"C:\Program Files\fluent-bit\bin\fluent-bit.exe\" -c \"C:\Program Files\fluent-bit\conf\fluent-bit.conf\"" ``` -### \[FAQ] How can I manage Fluent Bit service via PowerShell? +### Can you manage Fluent Bit service using PowerShell? Instead of `sc.exe`, PowerShell can be used to manage Windows services. Create a Fluent Bit service: -```powershell +```text PS> New-Service fluent-bit -BinaryPathName "C:\fluent-bit\bin\fluent-bit.exe -c C:\fluent-bit\conf\fluent-bit.conf" -StartupType Automatic ``` Start the service: -```powershell +```text PS> Start-Service fluent-bit ``` Query the service status: -```powershell +```text PS> get-Service fluent-bit | format-list Name : fluent-bit DisplayName : fluent-bit @@ -277,82 +298,88 @@ ServiceType : Win32OwnProcess Stop the service: -```powershell +```text PS> Stop-Service fluent-bit ``` Remove the service (requires PowerShell 6.0 or later) -```powershell +```text PS> Remove-Service fluent-bit ``` ## Compile from Source -If you need to create a custom executable, you can use the following procedure to compile Fluent Bit by yourself. +If you need to create a custom executable, use the following procedure to +compile Fluent Bit by yourself. ### Preparation -First, you need Microsoft Visual C++ to compile Fluent Bit. You can install the minimum toolkit by the following command: +1. Install Microsoft Visual C++ to compile Fluent Bit. You can install the minimum + toolkit using the following command: -```powershell +```text PS> wget -o vs.exe https://aka.ms/vs/16/release/vs_buildtools.exe PS> start vs.exe ``` -When asked which packages to install, choose "C++ Build Tools" (make sure that "C++ CMake tools for Windows" is selected too) and wait until the process finishes. +1. Choose `C++ Build Tools` and `C++ CMake tools for Windows` and wait until the process finishes. -Also you need to install flex and bison. One way to install them on Windows is to use [winflexbison](https://github.com/lexxmark/winflexbison). +1. Install flex and bison. One way to install them on Windows is to use + [winflexbison](https://github.com/lexxmark/winflexbison). -```powershell -PS> wget -o winflexbison.zip https://github.com/lexxmark/winflexbison/releases/download/v2.5.22/win_flex_bison-2.5.22.zip -PS> Expand-Archive winflexbison.zip -Destination C:\WinFlexBison -PS> cp -Path C:\WinFlexBison\win_bison.exe C:\WinFlexBison\bison.exe -PS> cp -Path C:\WinFlexBison\win_flex.exe C:\WinFlexBison\flex.exe -``` + ```text + PS> wget -o winflexbison.zip https://github.com/lexxmark/winflexbison/releases/download/v2.5.22/win_flex_bison-2.5.22.zip + PS> Expand-Archive winflexbison.zip -Destination C:\WinFlexBison + PS> cp -Path C:\WinFlexBison\win_bison.exe C:\WinFlexBison\bison.exe + PS> cp -Path C:\WinFlexBison\win_flex.exe C:\WinFlexBison\flex.exe + ``` -Add the path `C:\WinFlexBison` to your systems environment variable "Path". [Here's how to do that](https://www.architectryan.com/2018/03/17/add-to-the-path-on-windows-10/). +1. Add the path `C:\WinFlexBison` to your systems environment variable `Path`. + [Here's how to do that](https://www.architectryan.com/2018/03/17/add-to-the-path-on-windows-10/). -It is important to have installed OpenSSL binaries, at least the library files and headers. +1. Install OpenSSL binaries, at least the library files and headers. -Also you need to install [git](https://git-scm.com/download/win) to pull the source code from the repository. +1. Install [Git](https://git-scm.com/download/win) to pull the source code from the repository. -```powershell -PS> wget -o git.exe https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe -PS> start git.exe -``` + ```text + PS> wget -o git.exe https://github.com/git-for-windows/git/releases/download/v2.28.0.windows.1/Git-2.28.0-64-bit.exe + PS> start git.exe + ``` ### Compilation -Open the start menu on Windows and type "Command Prompt for VS". From the result list select the one that corresponds to your target system ( x86 or x64). - -> **Note:** Check that the installed OpenSSL library files match the selected target. You can check the library files by using the **dumpbin** command with the **/headers** option . +1. Open the **Start menu** on Windows and type `command Prompt for VS`. From the result + list, select the one that corresponds to your target system ( `x86` or `x64`). +1. Verify the installed OpenSSL library files match the selected target. You can + examine the library files by using the `dumpbin` command with the `/headers` + option . -Clone the source code of Fluent Bit. +1. Clone the source code of Fluent Bit. -```powershell -% git clone https://github.com/fluent/fluent-bit -% cd fluent-bit/build -``` + ```text + % git clone https://github.com/fluent/fluent-bit + % cd fluent-bit/build + ``` -Compile the source code. +1. Compile the source code. -```powershell -% cmake .. -G "NMake Makefiles" -% cmake --build . -``` + ```text + % cmake .. -G "NMake Makefiles" + % cmake --build . + ``` Now you should be able to run Fluent Bit: -```powershell -% .\bin\debug\fluent-bit.exe -i dummy -o stdout +```text +.\bin\debug\fluent-bit.exe -i dummy -o stdout ``` ### Packaging To create a ZIP package, call `cpack` as follows: -```powershell -% cpack -G ZIP +```text +cpack -G ZIP ``` diff --git a/installation/yocto-embedded-linux.md b/installation/yocto-embedded-linux.md index ccb135e72..9de20762c 100644 --- a/installation/yocto-embedded-linux.md +++ b/installation/yocto-embedded-linux.md @@ -1,16 +1,21 @@ -# Yocto / Embedded Linux +# Yocto embedded Linux -[Fluent Bit](https://fluentbit.io) source code provides Bitbake recipes to configure, build and package the software for a Yocto based image. Note that specific steps of usage of these recipes in your Yocto environment \(Poky\) is out of the scope of this documentation. +[Fluent Bit](https://fluentbit.io) source code provides BitBake recipes to configure, +build, and package the software for a Yocto-based image. Specific steps in the +usage of these recipes in your Yocto environment (Poky) is out of the scope of this +documentation. -We distribute two main recipes, one for testing/dev purposes and other with the latest stable release. +Fluent Bit distributes two main recipes, one for testing/dev purposes and +one with the latest stable release. | Version | Recipe | Description | | :--- | :--- | :--- | -| devel | [fluent-bit\_git.bb](https://github.com/fluent/fluent-bit/blob/master/fluent-bit_git.bb) | Build Fluent Bit from GIT master. This recipe aims to be used for development and testing purposes only. | -| v1.8.11 | [fluent-bit\_1.8.11.bb](https://github.com/fluent/fluent-bit/blob/v1.8.11/fluent-bit_1.8.11.bb) | Build latest stable version of Fluent Bit. | +| `devel` | [fluent-bit\_git.bb](https://github.com/fluent/fluent-bit/blob/master/fluent-bit_git.bb) | Build Fluent Bit from Git master. Use for development and testing purposes only. | +| `v1.8.11` | [fluent-bit\_1.8.11.bb](https://github.com/fluent/fluent-bit/blob/v1.8.11/fluent-bit_1.8.11.bb) | Build latest stable version of Fluent Bit. | -It's strongly recommended to always use the stable release of Fluent Bit recipe and not the one from GIT master for production deployments. +It's strongly recommended to always use the stable release of the Fluent Bit recipe +and not the one from Git master for production deployments. ## Fluent Bit and other architectures -Fluent Bit >= v1.1.x fully supports x86\_64, x86, arm32v7 and arm64v8. +Fluent Bit >= v1.1.x fully supports `x86_64`, `x86`, `arm32v7`, and `arm64v8`. diff --git a/local-testing/validating-your-data-and-structure.md b/local-testing/validating-your-data-and-structure.md index 81e3d2e63..aa72c6af7 100644 --- a/local-testing/validating-your-data-and-structure.md +++ b/local-testing/validating-your-data-and-structure.md @@ -1,47 +1,80 @@ -# Validating your Data and Structure +# Validating your data and structure -Fluent Bit is a powerful log processing tool that can deal with different sources and formats, in addition it provides several filters that can be used to perform custom modifications. This flexibility is really good but while your pipeline grows, it's strongly recommended to validate your data and structure. +Fluent Bit is a powerful log processing tool that supports mulitple sources and +formats. In addition, it provides filters that can be used to perform custom +modifications. As your pipeline grows, it's important to validate your data and +structure. -> We encourage Fluent Bit users to integrate data validation in their CI systems +Fluent Bit users are encouraged to integrate data validation in their contininuous +integration (CI) systems. -A simplified view of our data processing pipeline is as follows: +In a normal production environment, inputs, filters, and outputs are defined in the +configuration. Fluent Bit provides the [Expect](../pipeline/filters/expect.md) filter, +which can be used to validate `keys` and `values` from your records and take action +when an exception is found. - +A simplified view of the data processing pipeline is as follows: -In a normal production environment, many Inputs, Filters, and Outputs are defined in the configuration, so integrating a continuous validation of your configuration against expected results is a must. For this requirement, Fluent Bit provides a specific Filter called **Expect** which can be used to validate expected Keys and Values from your records and takes some action when an exception is found. +```mermaid +flowchart LR +IS[Inputs / Sources] +Fil[Filters] +OD[Outputs/ Destination] +IS --> Fil --> OD +``` -## How it Works +## Understand structure and configuration -As an example, consider the following pipeline where your source of data is a normal file with JSON content on it and then two filters: [grep](../pipeline/filters/grep.md) to exclude certain records and [record\_modifier](../pipeline/filters/record-modifier.md) to alter the record content adding and removing specific keys. +Consider the following pipeline, where your source of data is a file with JSON +content and two filters: - +- [grep](../pipeline/filters/grep.md) to exclude certain records +- [record_modifier](../pipeline/filters/record-modifier.md) to alter the record + content by adding and removing specific keys. -Ideally you want to add checkpoints of validation of your data between each step so you can know if your data structure is correct, we do this by using **expect** filter. +```mermaid +flowchart LR +tail["tail (input)"] +grep["grep (filter)"] +record["record_modifier (filter)"] +stdout["stdout (output)"] - +tail --> grep +grep --> record +record --> stdout +``` -Expect filter sets rules that aims to validate certain criteria like: +Add data validation between each step to ensure your data structure is correct. + +This example uses the `expect` filter. + +```mermaid +flowchart LR +tail["tail (input)"] +grep["grep (filter)"] +record["record_modifier (filter)"] +stdout["stdout (output)"] +E1["expect (filter)"] +E2["expect (filter)"] +E3["expect (filter)"] +tail --> E1 --> grep +grep --> E2 --> record --> E3 --> stdout +``` -* does the record contain a key A ? -* does the record not contains key A? -* does the record key A value equals NULL ? -* does the record key A value a different value than NULL ? -* does the record key A value equals B ? +`Expect` filters set rules aiming to validate criteria like: -Every expect filter configuration can expose specific rules to validate the content of your records, it supports the following configuration properties: +- Does the record contain a key `A`? +- Does the record not contain key `A`? +- Does the record key `A` value equal `NULL`? +- Is the record key `A` value not `NULL`? +- Does the record key `A` value equal `B`? -| Property | Description | -| :--- | :--- | -| key\_exists | Check if a key with a given name exists in the record. | -| key\_not\_exists | Check if a key does not exist in the record. | -| key\_val\_is\_null | check that the value of the key is NULL. | -| key\_val\_is\_not\_null | check that the value of the key is NOT NULL. | -| key\_val\_eq | check that the value of the key equals the given value in the configuration. | -| action | action to take when a rule does not match. The available options are `warn` or `exit`. On `warn`, a warning message is sent to the logging layer when a mismatch of the rules above is found; using `exit` makes Fluent Bit abort with status code `255`. | +Every `expect` filter configuration exposes rules to validate the content of your +records using [configuration properties](../pipeline/filters/expect.md#configuration-parameters). -## Start Testing +## Test the configuration -Consider the following JSON file called `data.log` with the following content: +Consider a JSON file `data.log` with the following content: ```javascript {"color": "blue", "label": {"name": null}} @@ -49,7 +82,9 @@ Consider the following JSON file called `data.log` with the following content: {"color": "green", "label": {"name": "abc"}, "meta": null} ``` -The following Fluent Bit configuration file will configure a pipeline to consume the log above apply an expect filter to validate that keys `color` and `label` exists: +The following Fluent Bit configuration file configures a pipeline to consume the +log, while applying an `expect` filter to validate that the keys `color` and `label` +exist: ```python [SERVICE] @@ -76,9 +111,12 @@ The following Fluent Bit configuration file will configure a pipeline to consume match * ``` -note that if for some reason the JSON parser failed or is missing in the `tail` input \(line 9\), the `expect` filter will trigger the `exit` action. As a test, go ahead and comment out or remove line 9. +If the JSON parser fails or is missing in the `tail` input +(`parser json`), the `expect` filter triggers the `exit` action. -As a second step, we will extend our pipeline and we will add a grep filter to match records that map `label` contains a key called `name` with value `abc`, then an expect filter to re-validate that condition: +To extend the pipeline, add a grep filter to match records that map `label` +containing a key called `name` with value the `abc`, and add an `expect` filter +to re-validate that condition: ```python [SERVICE] @@ -131,7 +169,8 @@ As a second step, we will extend our pipeline and we will add a grep filter to m match * ``` -## Deploying in Production - -When deploying your configuration in production, you might want to remove the expect filters from your configuration since it's an unnecessary _extra work_ unless you want to have a 100% coverage of checks at runtime. +## Production deployment +When deploying in production, consider removing the `expect` filters from your +configuration. These filters are unneccesary unless you need 100% coverage of +checks at runtime. diff --git a/pipeline/filters/aws-metadata.md b/pipeline/filters/aws-metadata.md index b7b24e5f7..0172d5af0 100644 --- a/pipeline/filters/aws-metadata.md +++ b/pipeline/filters/aws-metadata.md @@ -20,6 +20,7 @@ The plugin supports the following configuration parameters: | tags\_enabled | Specifies if should attach EC2 instance tags. EC2 instance must have the [instance-metadata-tags](https://awscli.amazonaws.com/v2/documentation/api/latest/reference/ec2/modify-instance-metadata-options.html) option enabled (which is disabled by default). | false | | tags\_include | Defines list of specific EC2 tag keys to inject into the logs. Tag keys must be separated by "," character. Tags which are not present in this list will be ignored. Example: `Name,tag1,tag2`. | | | tags\_exclude | Defines list of specific EC2 tag keys not to inject into the logs. Tag keys must be separated by "," character. Tags which are not present in this list will be injected into the logs. If both `tags_include` and `tags_exclude` are specified, configuration is invalid and plugin fails. Example: `Name,tag1,tag2` | | +| retry\_interval\_s |Defines minimum duration between retries for fetching EC2 instance tags. | 300 | Note: _If you run Fluent Bit in a container, you may have to use instance metadata v1._ The plugin behaves the same regardless of which version is used. diff --git a/pipeline/filters/ecs-metadata.md b/pipeline/filters/ecs-metadata.md index e2d6f567b..a674e4290 100644 --- a/pipeline/filters/ecs-metadata.md +++ b/pipeline/filters/ecs-metadata.md @@ -20,7 +20,7 @@ The following template variables can be used for values with the `Add` option. S | Variable | Description | Supported with Cluster\_Metadata\_Only On | | :--- | :--- | :--- | | `$ClusterName` | The ECS cluster name. Fluent Bit is running on EC2 instance(s) that are part of this cluster. | Yes | -| `$ContainerInstanceARN` | The full ARN of the ECS EC2 Container Instance. This is the instance that Fluent Bit is running on. | Yes | +| `$ContainerInstanceArn` | The full ARN of the ECS EC2 Container Instance. This is the instance that Fluent Bit is running on. | Yes | | `$ContainerInstanceID` | The ID of the ECS EC2 Container Instance. | Yes | | `$ECSAgentVersion` | The Version string of the ECS Agent that is running on the container instance. | Yes | | `$ECSContainerName` | The name of the container from which the log originated. This is the name in your ECS Task Definition. | No | diff --git a/pipeline/filters/grep.md b/pipeline/filters/grep.md index c3aeb609d..019b007c5 100644 --- a/pipeline/filters/grep.md +++ b/pipeline/filters/grep.md @@ -1,28 +1,33 @@ --- -description: Select or exclude records per patterns +description: Select or exclude records using patterns --- # Grep -The _Grep Filter_ plugin allows you to match or exclude specific records based on regular expression patterns for values or nested values. +The _Grep Filter_ plugin lets you match or exclude specific records based on +regular expression patterns for values or nested values. -## Configuration Parameters +## Configuration parameters The plugin supports the following configuration parameters: -| Key | Value Format | Description | -| :--- | :--- | :--- | -| Regex | KEY REGEX | Keep records in which the content of KEY matches the regular expression. | -| Exclude | KEY REGEX | Exclude records in which the content of KEY matches the regular expression. | -| Logical_Op| Operation | Specify which logical operator to use. `AND` , `OR` and `legacy` are allowed as an Operation. Default is `legacy` for backward compatibility. In `legacy` mode the behaviour is either AND or OR depending whether the `grep` is including (uses AND) or excluding (uses OR). Only available from 2.1+. | +| Key | Value Format | Description | +| ------------ | ------------ | ----------- | +| `Regex` | KEY REGEX | Keep records where the content of KEY matches the regular expression. | +| `Exclude` | KEY REGEX | Exclude records where the content of KEY matches the regular expression. | +| `Logical_Op` | Operation | Specify a logical operator: `AND`, `OR` or `legacy` (default). In `legacy` mode the behaviour is either `AND` or `OR` depending on whether the `grep` is including (uses AND) or excluding (uses OR). Available from 2.1 or higher. | -#### Record Accessor Enabled +### Record Accessor Enabled -This plugin enables the [Record Accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md) feature to specify the KEY. Using the _record accessor_ is suggested if you want to match values against nested values. +Enable the [Record Accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md) +feature to specify the KEY. Use the record accessor to match values against nested +values. -## Getting Started +## Filter records -In order to start filtering records, you can run the filter from the command line or through the configuration file. The following example assumes that you have a file called `lines.txt` with the following content: +To start filtering records, run the filter from the command line or through the +configuration file. The following example assumes that you have a file named +`lines.txt` with the following content: ```text {"log": "aaa"} @@ -35,20 +40,25 @@ In order to start filtering records, you can run the filter from the command lin {"log": "ggg"} ``` -### Command Line +### Command line -> Note: using the command line mode need special attention to quote the regular expressions properly. It's suggested to use a configuration file. +When using the command line, pay close attention to quote the regular expressions. +Using a configuration file might be easier. -The following command will load the _tail_ plugin and read the content of `lines.txt` file. Then the _grep_ filter will apply a regular expression rule over the _log_ field \(created by tail plugin\) and only _pass_ the records which field value starts with _aa_: +The following command loads the [tail](../../pipeline/inputs/tail) plugin and +reads the content of `lines.txt`. Then the `grep` filter applies a regular +expression rule over the `log` field created by the `tail` plugin and only passes +records with a field value starting with `aa`: ```text $ bin/fluent-bit -i tail -p 'path=lines.txt' -F grep -p 'regex=log aa' -m '*' -o stdout ``` -### Configuration File +### Configuration file {% tabs %} {% tab title="fluent-bit.conf" %} + ```python [SERVICE] parsers_file /path/to/parsers.conf @@ -67,9 +77,11 @@ $ bin/fluent-bit -i tail -p 'path=lines.txt' -F grep -p 'regex=log aa' -m '*' -o name stdout match * ``` + {% endtab %} {% tab title="fluent-bit.yaml" %} + ```yaml service: parsers_file: /path/to/parsers.conf @@ -87,14 +99,21 @@ pipeline: match: '*' ``` + {% endtab %} {% endtabs %} -The filter allows to use multiple rules which are applied in order, you can have many _Regex_ and _Exclude_ entries as required. +The filter lets you use multiple rules which are applied in order. You can +have as many `Regex` and `Exclude` entries as required. ### Nested fields example -If you want to match or exclude records based on nested values, you can use a [Record Accessor ](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md)format as the KEY name. Consider the following record example: +To match or exclude records based on nested values, you can use +[Record +Accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md) +format as the `KEY` name. + +Consider the following record example: ```javascript { @@ -113,40 +132,45 @@ If you want to match or exclude records based on nested values, you can use a [R } ``` -if you want to exclude records that match given nested field \(for example `kubernetes.labels.app`\), you can use the following rule: +For example, to exclude records that match the nested field `kubernetes.labels.app`, +use the following rule: {% tabs %} {% tab title="fluent-bit.conf" %} + ```python [FILTER] Name grep Match * Exclude $kubernetes['labels']['app'] myapp ``` -{% endtab %} +{% endtab %} {% tab title="fluent-bit.yaml" %} + ```yaml filters: - name: grep match: '*' exclude: $kubernetes['labels']['app'] myapp ``` + {% endtab %} {% endtabs %} -### Excluding records missing/invalid fields - -It may be that in your processing pipeline you want to drop records that are missing certain keys. +### Excluding records with missing or invalid fields -A simple way to do this is just to `exclude` with a regex that matches anything, a missing key will fail this check. +You might want to drop records that are missing certain keys. -Here is an example that checks for a specific valid value for the key as well: +One way to do this is to `exclude` with a regex that matches anything. A missing +key fails this check. +The followinfg example checks for a specific valid value for the key: {% tabs %} {% tab title="fluent-bit.conf" %} -``` + +```text # Use Grep to verify the contents of the iot_timestamp value. # If the iot_timestamp key does not exist, this will fail # and exclude the row. @@ -156,9 +180,10 @@ Here is an example that checks for a specific valid value for the key as well: Match iots_thread.* Regex iot_timestamp ^\d{4}-\d{2}-\d{2} ``` -{% endtab %} +{% endtab %} {% tab title="fluent-bit.yaml" %} + ```yaml filters: - name: grep @@ -166,20 +191,23 @@ Here is an example that checks for a specific valid value for the key as well: match: iots_thread.* regex: iot_timestamp ^\d{4}-\d{2}-\d{2} ``` + {% endtab %} {% endtabs %} -The specified key `iot_timestamp` must match the expected expression - if it does not or is missing/empty then it will be excluded. +The specified key `iot_timestamp` must match the expected expression. If it doesn't, +or is missing or empty, then it will be excluded. ### Multiple conditions -If you want to set multiple `Regex` or `Exclude`, you can use `Logical_Op` property to use logical conjuction or disjunction. - -Note: If `Logical_Op` is set, setting both 'Regex' and `Exclude` results in an error. +If you want to set multiple `Regex` or `Exclude`, use the `Logical_Op` property +to use a logical conjuction or disjunction. +If `Logical_Op` is set, setting both `Regex` and `Exclude` results in an error. {% tabs %} {% tab title="fluent-bit.conf" %} + ```python [INPUT] Name dummy @@ -196,9 +224,11 @@ Note: If `Logical_Op` is set, setting both 'Regex' and `Exclude` results in an e [OUTPUT] Name stdout ``` + {% endtab %} {% tab title="fluent-bit.yaml" %} + ```yaml pipeline: inputs: @@ -215,11 +245,13 @@ pipeline: outputs: - name: stdout ``` + {% endtab %} {% endtabs %} -Output will be -``` +The output looks similar to: + +```text Fluent Bit v2.0.9 * Copyright (C) 2015-2022 The Fluent Bit Authors * Fluent Bit is a CNCF sub-project under the umbrella of Fluentd @@ -236,4 +268,4 @@ Fluent Bit v2.0.9 [2023/01/22 09:46:49] [ info] [output:stdout:stdout.0] worker #0 started [0] dummy: [1674348410.558341857, {"endpoint"=>"localhost", "value"=>"something"}] [0] dummy: [1674348411.546425499, {"endpoint"=>"localhost", "value"=>"something"}] -``` \ No newline at end of file +``` diff --git a/pipeline/filters/kubernetes.md b/pipeline/filters/kubernetes.md index 92a37583f..6d38cf1d7 100644 --- a/pipeline/filters/kubernetes.md +++ b/pipeline/filters/kubernetes.md @@ -9,10 +9,13 @@ When Fluent Bit is deployed in Kubernetes as a DaemonSet and configured to read * Namespace * Container Name * Container ID -* Query Kubernetes API Server to obtain extra metadata for the POD in question: +* Query Kubernetes API Server or Kubelet to obtain extra metadata for the POD in question: * Pod ID * Labels + * Owner References * Annotations + * Namespace Labels + * Namespace Annotations The data is cached locally in memory and appended to each record. @@ -35,23 +38,30 @@ The plugin supports the following configuration parameters: | Keep\_Log | When `Keep_Log` is disabled, the `log` field is removed from the incoming message once it has been successfully merged \(`Merge_Log` must be enabled as well\). | On | | tls.debug | Debug level between 0 \(nothing\) and 4 \(every detail\). | -1 | | tls.verify | When enabled, turns on certificate validation when connecting to the Kubernetes API server. | On | +| tls.verify\_hostname | When enabled, turns on hostname validation for certificates | Off | | Use\_Journal | When enabled, the filter reads logs coming in Journald format. | Off | | Cache\_Use\_Docker\_Id | When enabled, metadata will be fetched from K8s when docker\_id is changed. | Off | | Regex\_Parser | Set an alternative Parser to process record Tag and extract pod\_name, namespace\_name, container\_name and docker\_id. The parser must be registered in a [parsers file](https://github.com/fluent/fluent-bit/blob/master/conf/parsers.conf) \(refer to parser _filter-kube-test_ as an example\). | | | K8S-Logging.Parser | Allow Kubernetes Pods to suggest a pre-defined Parser \(read more about it in Kubernetes Annotations section\) | Off | | K8S-Logging.Exclude | Allow Kubernetes Pods to exclude their logs from the log processor \(read more about it in Kubernetes Annotations section\). | Off | -| Labels | Include Kubernetes resource labels in the extra metadata. | On | -| Annotations | Include Kubernetes resource annotations in the extra metadata. | On | +| Labels | Include Kubernetes pod resource labels in the extra metadata. | On | +| Annotations | Include Kubernetes pod resource annotations in the extra metadata. | On | | Kube\_meta\_preload\_cache\_dir | If set, Kubernetes meta-data can be cached/pre-loaded from files in JSON format in this directory, named as namespace-pod.meta | | | Dummy\_Meta | If set, use dummy-meta data \(for test/dev purposes\) | Off | | DNS\_Retries | DNS lookup retries N times until the network start working | 6 | | DNS\_Wait\_Time | DNS lookup interval between network status checks | 30 | -| Use\_Kubelet | this is an optional feature flag to get metadata information from kubelet instead of calling Kube Server API to enhance the log. This could mitigate the [Kube API heavy traffic issue for large cluster](kubernetes.md#optional-feature-using-kubelet-to-get-metadata). | Off | +| Use\_Kubelet | this is an optional feature flag to get metadata information from kubelet instead of calling Kube Server API to enhance the log. This could mitigate the [Kube API heavy traffic issue for large cluster](kubernetes.md#optional-feature-using-kubelet-to-get-metadata). If used when any [Kubernetes Namespace Meta](#kubernetes-namespace-meta) fields are enabled, Kubelet will be used to fetch pod data, but namespace meta will still be fetched using the `Kube_URL` settings.| Off | +| Use\_Tag\_For\_Meta | When enabled, Kubernetes metadata (e.g., pod_name, container_name, namespace_name etc) will be extracted from the tag itself. Connection to Kubernetes API Server will not get established and API calls for metadata won't be made. See [Workflow of Tail + Kubernetes Filter](#workflow-of-tail--kubernetes-filter) and [Custom Tag For Enhanced Filtering](#custom-tag-for-enhanced-filtering) to better understand metadata extraction from tags. | Off | | Kubelet\_Port | kubelet port using for HTTP request, this only works when `Use_Kubelet` set to On. | 10250 | | Kubelet\_Host | kubelet host using for HTTP request, this only works when `Use_Kubelet` set to On. | 127.0.0.1 | -| Kube\_Meta\_Cache\_TTL | configurable TTL for K8s cached metadata. By default, it is set to 0 which means TTL for cache entries is disabled and cache entries are evicted at random when capacity is reached. In order to enable this option, you should set the number to a time interval. For example, set this value to 60 or 60s and cache entries which have been created more than 60s will be evicted. | 0 | +| Kube\_Meta\_Cache\_TTL | configurable TTL for K8s cached pod metadata. By default, it is set to 0 which means TTL for cache entries is disabled and cache entries are evicted at random when capacity is reached. In order to enable this option, you should set the number to a time interval. For example, set this value to 60 or 60s and cache entries which have been created more than 60s will be evicted. | 0 | | Kube\_Token\_TTL | configurable 'time to live' for the K8s token. By default, it is set to 600 seconds. After this time, the token is reloaded from Kube_Token_File or the Kube_Token_Command.| 600 | | Kube\_Token\_Command | Command to get Kubernetes authorization token. By default, it will be `NULL` and we will use token file to get token. If you want to manually choose a command to get it, you can set the command here. For example, run `aws-iam-authenticator -i your-cluster-name token --token-only` to set token. This option is currently Linux-only. | | +| Kube\_Meta\_Namespace\_Cache\_TTL | configurable TTL for K8s cached namespace metadata. By default, it is set to 900 which means a 15min TTL for namespace cache entries. Setting this to 0 will mean entries are evicted at random once the cache is full. | 900 | +| Namespace\_Labels | Include Kubernetes namespace resource labels in the extra metadata. See [Kubernetes Namespace Meta](#kubernetes-namespace-meta)| Off | +| Namespace\_Annotations | Include Kubernetes namespace resource annotations in the extra metadata. See [Kubernetes Namespace Meta](#kubernetes-namespace-meta)| Off | +| Namespace\_Metadata\_Only | Include Kubernetes namespace metadata only and no pod metadata. If this is set, the values of `Labels` and `Annotations` are ignored. See [Kubernetes Namespace Meta](#kubernetes-namespace-meta)| Off | +| Owner\_References | Include Kubernetes owner references in the extra metadata | Off | ## Processing the 'log' value @@ -76,7 +86,24 @@ To perform processing of the _log_ key, it's **mandatory to enable** the _Merge\ If _log_ value processing fails, the value is untouched. The order above is not chained, meaning it's exclusive and the filter will try only one of the options above, **not** all of them. -## Kubernetes Annotations +## Kubernetes Namespace Meta + +Namespace Meta can be enabled via the following settings: + +* Namespace\_Labels +* Namespace\_Annotations + +Using any Namespace Meta requires the use of the Kube API as it can not be fetched directly from Kubelet. If `Use_Kubelet On` has been set, the Kubelet api will only be used to fetch pod metadata, while namespace meta is fetched from the upstream Kubernetes API. + +Namespace Meta if collected will be stored within a `kubernetes_namespace` record key. + +> Namespace meta is not be guaranteed to be in sync as namespace labels & annotations can be adjusted after pod creation. Adjust `Kube_Meta_Namespace_Cache_TTL` to lower caching times to fit your use case. + +* Namespace\_Metadata\_Only + * Using this feature will instruct fluent-bit to only fetch namespace metadata and to not fetch POD metadata at all. + POD basic metadata like container id, host, etc will be NOT be added and the Labels and Annotations configuration options which are used specifically for POD Metadata will be ignored. + +## Kubernetes Pod Annotations A flexible feature of Fluent Bit Kubernetes filter is that allow Kubernetes Pods to suggest certain behaviors for the log processor pipeline when processing the records. At the moment it support: @@ -132,6 +159,14 @@ spec: Note that the annotation value is boolean which can take a _true_ or _false_ and **must** be quoted. +## Kubernetes Owner References + +An opt-in feature of Fluent Bit Kubernetes filter to include owner references information under `kubernetes.ownerReferences` field in the record when enabled. An example of record is shown below. + +``` +"kubernetes"=>{"pod_name"=>"fluentbit-gke-2p6b5", "namespace_name"=>"kube-system", "pod_id"=>"c759a5f5-xxxx-xxxx-9117-8a1dc0b1f907", "labels"=>{"component"=>"xxxx", "controller-revision-hash"=>"77665fff9", "k8s-app"=>"fluentbit-xxxx"}, "ownerReferences"=>[{"apiVersion"=>"apps/v1", "kind"=>"DaemonSet", "name"=>"fluentbit-gke", "uid"=>"1a12c3e2-d6c4-4a8a-b877-dd3c857d1aea", "controller"=>true, "blockOwnerDeletion"=>true}], "host"=>"xxx-2a9c049c-qgw3", "pod_ip"=>"10.128.0.111", "container_name"=>"fluentbit", "docker_id"=>"2accxxx", "container_hash"=>"xxxx", "container_image"=>"sha256:5163dxxxxea2"}} +``` + ## Workflow of Tail + Kubernetes Filter Kubernetes Filter depends on either [Tail](../inputs/tail.md) or [Systemd](../inputs/systemd.md) input plugins to process and enrich records with Kubernetes metadata. Here we will explain the workflow of Tail and how it configuration is correlated with Kubernetes filter. Consider the following configuration example \(just for demo purposes, not production\): @@ -141,7 +176,7 @@ Kubernetes Filter depends on either [Tail](../inputs/tail.md) or [Systemd](../in Name tail Tag kube.* Path /var/log/containers/*.log - Parser docker + multiline.parser docker, cri [FILTER] Name kubernetes @@ -202,10 +237,37 @@ You can see on [Rublar.com](https://rubular.com/r/HZz3tYAahj6JCd) web site how t * [https://rubular.com/r/HZz3tYAahj6JCd](https://rubular.com/r/HZz3tYAahj6JCd) -#### Custom Regex +### Custom Regex Under certain and not common conditions, a user would want to alter that hard-coded regular expression, for that purpose the option **Regex\_Parser** can be used \(documented on top\). +#### Custom Tag For Enhanced Filtering + +One such use case involves splitting logs by namespace, pods, containers or container id. +The tag is restructured within the tail input using match groups, this can simplify the filtering by those match groups later in the pipeline. +Since the tag no longer follows the original file name, a custom **Regex\_Parser** that matches the new tag structure is required: + +```text +[PARSER] + Name custom-tag + Format regex + Regex ^(?<namespace_name>[^_]+)\.(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)\.(?<container_name>.+)\.(?<container_id>[a-z0-9]{64}) + +[INPUT] + Name tail + Tag kube.<namespace_name>.<pod_name>.<container_name>.<container_id> + Path /var/log/containers/*.log + Tag_Regex (?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<container_id>[a-z0-9]{64})\.log$ + Parser cri + +[FILTER] + Name kubernetes + Match kube.* + Kube_Tag_Prefix kube. + Regex_Parser custom-tag + Merge_Log On +``` + #### Final Comments So at this point the filter is able to gather the values of _pod\_name_ and _namespace_, with that information it will check in the local cache \(internal hash table\) if some metadata for that key pair exists, if so, it will enrich the record with the metadata value, otherwise it will connect to the Kubernetes Master/API Server and retrieve that information. @@ -220,7 +282,7 @@ There are some configuration setup needed for this feature. Role Configuration for Fluent Bit DaemonSet Example: -```text +```yaml --- apiVersion: v1 kind: ServiceAccount @@ -239,7 +301,7 @@ rules: - pods - nodes - nodes/proxy - verbs: + verbs: - get - list - watch @@ -263,34 +325,34 @@ The difference is that kubelet need a special permission for resource `nodes/pro Fluent Bit Configuration Example: ```text - [INPUT] - Name tail - Tag kube.* - Path /var/log/containers/*.log - DB /var/log/flb_kube.db - Parser docker - Docker_Mode On - Mem_Buf_Limit 50MB - Skip_Long_Lines On - Refresh_Interval 10 - - [FILTER] - Name kubernetes - Match kube.* - Kube_URL https://kubernetes.default.svc.cluster.local:443 - Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt - Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token - Merge_Log On - Buffer_Size 0 - Use_Kubelet true - Kubelet_Port 10250 +[INPUT] + Name tail + Tag kube.* + Path /var/log/containers/*.log + DB /var/log/flb_kube.db + Parser docker + Docker_Mode On + Mem_Buf_Limit 50MB + Skip_Long_Lines On + Refresh_Interval 10 + +[FILTER] + Name kubernetes + Match kube.* + Kube_URL https://kubernetes.default.svc.cluster.local:443 + Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token + Merge_Log On + Buffer_Size 0 + Use_Kubelet true + Kubelet_Port 10250 ``` So for fluent bit configuration, you need to set the `Use_Kubelet` to true to enable this feature. DaemonSet config Example: -```text +```yaml --- apiVersion: apps/v1 kind: DaemonSet @@ -384,19 +446,23 @@ If you are not seeing metadata added to your kubernetes logs and see the followi When Fluent Bit is deployed as a DaemonSet it generally runs with specific roles that allow the application to talk to the Kubernetes API server. If you are deployed in a more restricted environment check that all the Kubernetes roles are set correctly. You can test this by running the following command (replace `fluentbit-system` with the namespace where your fluentbit is installed) + ```text kubectl auth can-i list pods --as=system:serviceaccount:fluentbit-system:fluentbit ``` -If set roles are configured correctly, it should simply respond with `yes`. -For instance, using Azure AKS, running the above command may respond with: +If set roles are configured correctly, it should simply respond with `yes`. + +For instance, using Azure AKS, running the above command may respond with: + ```text no - Azure does not have opinion for this user. ``` -If you have connectivity to the API server, but still "could not get meta for POD" - debug logging might give you a message with `Azure does not have opinion for this user`. Then the following `subject` may need to be included in the `fluentbit` `ClusterRoleBinding`: +If you have connectivity to the API server, but still "could not get meta for POD" - debug logging might give you a message with `Azure does not have opinion for this user`. Then the following `subject` may need to be included in the `fluentbit` `ClusterRoleBinding`: appended to `subjects` array: + ```yaml - apiGroup: rbac.authorization.k8s.io kind: Group @@ -415,3 +481,8 @@ By default the Kube\_URL is set to `https://kubernetes.default.svc:443` . Ensure In some cases, you may only see some objects being appended with metadata while other objects are not enriched. This can occur at times when local data is cached and does not contain the correct id for the kubernetes object that requires enrichment. For most Kubernetes objects the Kubernetes API server is updated which will then be reflected in Fluent Bit logs, however in some cases for `Pod` objects this refresh to the Kubernetes API server can be skipped, causing metadata to be skipped. +## Credit + +Our Kubernetes Filter plugin is fully inspired by the [Fluentd Kubernetes Metadata +Filter](https://github.com/fabric8io/fluent-plugin-kubernetes\_metadata\_filter) +written by [Jimmi Dyson](https://github.com/jimmidyson). diff --git a/pipeline/filters/log_to_metrics.md b/pipeline/filters/log_to_metrics.md index 5dbc017d7..82bc245bc 100644 --- a/pipeline/filters/log_to_metrics.md +++ b/pipeline/filters/log_to_metrics.md @@ -2,36 +2,41 @@ description: Generate metrics from logs --- -# Log To Metrics +# Log to Metrics + + + +## Log To Metrics The _Log To Metrics Filter_ plugin allows you to generate log-derived metrics. It currently supports modes to count records, provide a gauge for field values or create a histogram. You can also match or exclude specific records based on regular expression patterns for values or nested values. This filter plugin does not actually act as a record filter and does not change or drop records. All records will pass this filter untouched and generated metrics will be emitted into a seperate metric pipeline. _Please note that this plugin is an experimental feature and is not recommended for production use. Configuration parameters and plugin functionality are subject to change without notice._ - -## Configuration Parameters +### Configuration Parameters The plugin supports the following configuration parameters: -| Key | Description | Mandatory | Value Format -| :--- | :--- | :--- | :--- -| tag | Defines the tag for the generated metrics record| Yes | | -| metric_mode | Defines the mode for the metric. Valid values are [`counter`, `gauge` or `histogram`] | Yes | | -| metric_name | Sets the name of the metric. | Yes | | -| metric_description | Sets a help text for the metric. | Yes | | -| bucket | Defines a bucket for `histogram` | Yes, for mode `histogram` | e.g. 0.75 | -| add_label | Add a custom label NAME and set the value to the value of KEY | | | NAME KEY | -| label_field | Includes a record field as label dimension in the metric. | | Name of record key. Supports [Record Accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md) notation for nested fields. -| value_field | Specify the record field that holds a numerical value | Yes, for modes [`gauge` and `histogram`] | Name of record key. Supports [Record Accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md) notation for nested fields. -| kubernetes_mode | If enabled, it will automatically put pod_id, pod_name, namespace_name, docker_id and container_name into the metric as labels. This option is intended to be used in combination with the [kubernetes](./kubernetes.md) filter plugin, which fills those fields. | | -| Regex | Include records in which the content of KEY matches the regular expression. | | KEY REGEX -| Exclude | Exclude records in which the content of KEY matches the regular expression. | | KEY REGEX - -## Getting Started +| Key | Description | Mandatory | Value Format | +| --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- | +| tag | Defines the tag for the generated metrics record | Yes | | +| metric\_mode | Defines the mode for the metric. Valid values are \[`counter`, `gauge` or `histogram`] | Yes | | +| metric\_name | Sets the name of the metric. | Yes | | +| metric\_description | Sets a help text for the metric. | Yes | | +| bucket | Defines a bucket for `histogram` | Yes, for mode `histogram` | e.g. 0.75 | +| add\_label | Add a custom label NAME and set the value to the value of KEY | | | +| label\_field | Includes a record field as label dimension in the metric. | | Name of record key. Supports [Record Accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md) notation for nested fields. | +| value\_field | Specify the record field that holds a numerical value | Yes, for modes \[`gauge` and `histogram`] | Name of record key. Supports [Record Accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md) notation for nested fields. | +| kubernetes\_mode | If enabled, it will automatically put pod\_id, pod\_name, namespace\_name, docker\_id and container\_name into the metric as labels. This option is intended to be used in combination with the [kubernetes](kubernetes.md) filter plugin, which fills those fields. | | | +| Regex | Include records in which the content of KEY matches the regular expression. | | KEY REGEX | +| Exclude | Exclude records in which the content of KEY matches the regular expression. | | KEY REGEX | +| Flush\_Interval\_Sec | The interval for metrics emission, in seconds. If **Flush\_Interval\_Sec** and **Flush\_Interval\_Nsec** are either both unset or both set to `0`, the filter emits metrics immediately after each filter match. Otherwise, if either parameter is set to a non-zero value, the filter emits metrics at the specified interval. Longer intervals help lower resource consumption in high-load situations. Default value: `0`. | | | +| Flush\_Interval\_Nsec | The interval for metrics emission, in nanoseconds. This parameter works in conjunction with **Flush\_Interval\_Sec**. Default value: `0`. | | | + +### Getting Started The following example takes records from two dummy inputs and counts all messages passing through the `log_to_metrics` filter. It then generates metric records which are provided to the `prometheus_exporter`: -### Configuration - Counter +#### Configuration - Counter ```python [SERVICE] @@ -64,7 +69,8 @@ The following example takes records from two dummy inputs and counts all message ``` You can then use e.g. curl command to retrieve the generated metric: -```text + +``` > curl -s http://127.0.0.1:2021/metrics @@ -73,9 +79,10 @@ You can then use e.g. curl command to retrieve the generated metric: log_metric_counter_count_all_dummy_messages 49 ``` -### Configuration - Gauge +#### Configuration - Gauge The `gauge` mode needs a `value_field` specified, where the current metric values are generated from. In this example we also apply a regex filter and enable the `kubernetes_mode` option: + ```python [FILTER] name log_to_metrics @@ -91,8 +98,10 @@ The `gauge` mode needs a `value_field` specified, where the current metric value label_field color label_field shape ``` + You can then use e.g. curl command to retrieve the generated metric: -```text + +``` > curl -s http://127.0.0.1:2021/metrics @@ -103,16 +112,17 @@ log_metric_gauge_current_duration{namespace_name="default",pod_name="pod1",conta As you can see in the output, only one line is printed, as the records from the first input plugin are ignored, as they do not match the regex. -The filter also allows to use multiple rules which are applied in order, you can have many _Regex_ and _Exclude_ entries as required (see [grep](./grep.md) filter plugin). +The filter also allows to use multiple rules which are applied in order, you can have many _Regex_ and _Exclude_ entries as required (see [grep](grep.md) filter plugin). If you execute the above `curl` command multiple times, you see, that in this example the metric value stays at `60`, as the messages generated by the dummy plugin are not changing. In a real-world scenario the values would change and return the last processed value. +**Metric label\_values** -#### Metric label_values As you can see, the label sets defined by `add_label` and `label_field` are added to the metric. The lines in the metric represent every combination of labels. Only actually used combinations are displayed here. To see this, you can add a dummy `dummy` input to your configuration. The metric output would then look like: -```text + +``` > curl -s http://127.0.0.1:2021/metrics # HELP log_metric_gauge_current_duration This metric shows the current duration @@ -122,11 +132,12 @@ log_metric_gauge_current_duration{namespace_name="default",pod_name="pod1",conta ``` -You can also see, that all the kubernetes labels have been attached to the metric, accordingly. +You can also see, that all the kubernetes labels have been attached to the metric, accordingly. -### Configuration - Histogram +#### Configuration - Histogram Similar to the `gauge` mode, `histogram` needs a `value_field` specified, where the current metric values are generated from. In this example we also apply a regex filter and enable the `kubernetes_mode` option: + ```python [FILTER] name log_to_metrics @@ -142,8 +153,10 @@ Similar to the `gauge` mode, `histogram` needs a `value_field` specified, where label_field color label_field shape ``` + You can then use e.g. curl command to retrieve the generated metric: -```text + +``` > curl -s http://127.0.0.1:2021/metrics @@ -206,5 +219,4 @@ As you can see in the output, there are per default the buckets `0.005, 0.01, 0. Please note, that the `+Inf` bucket will always be included implicitly. The buckets in a histogram are cumulative, so a value added to one bucket will add to all larger buckets, too. - -You can also see, that all the kubernetes labels have been attached to the metric, idential to the behavior of `label_field` described in [the previous chapter](#metric-label_values). That results in two sets for the histogram. \ No newline at end of file +This filter also attaches Kubernetes labels to each metric, identical to the behavior of `label_field`. This results in two sets for the histogram. diff --git a/pipeline/filters/lua.md b/pipeline/filters/lua.md index 2ef8e9872..f7d2745eb 100644 --- a/pipeline/filters/lua.md +++ b/pipeline/filters/lua.md @@ -2,6 +2,8 @@ The **Lua** filter allows you to modify the incoming records (even split one record into multiple records) using custom [Lua](https://www.lua.org/) scripts. +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=f519378e-536c-4b25-8949-ee6ed8d8d6c1" /> + Due to the necessity to have a flexible filtering mechanism, it is now possible to extend Fluent Bit capabilities by writing custom filters using Lua programming language. A Lua-based filter takes two steps: 1. Configure the Filter in the main configuration @@ -38,6 +40,8 @@ $ fluent-bit -i dummy -F lua -p script=test.lua -p call=cb_print -m '*' -o null In your main configuration file append the following _Input_, _Filter_ & _Output_ sections: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```python [INPUT] Name dummy @@ -52,6 +56,25 @@ In your main configuration file append the following _Input_, _Filter_ & _Output Name null Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: dummy + filters: + - name: lua + match: '*' + script: test.lua + call: cb_print + outputs: + - name: null + match: '*' +``` +{% endtab %} +{% endtabs %} + ## Lua Script Filter API <a id="lua_script"></a> @@ -91,16 +114,38 @@ Each callback **must** return three values: | timestamp | double | If code equals 1, the original record timestamp will be replaced with this new value. | | record | table | If code equals 1, the original record information will be replaced with this new value. Note that the _record_ value **must** be a valid Lua table. This value can be an array of tables (i.e., array of objects in JSON format), and in that case the input record is effectively split into multiple records. (see below for more details) | -### Code Examples +## Features -For functional examples of this interface, please refer to the code samples provided in the source code of the project located here: +### Inline configuration -[https://github.com/fluent/fluent-bit/tree/master/scripts](https://github.com/fluent/fluent-bit/tree/master/scripts) +The [Fluent Bit smoke tests](https://github.com/fluent/fluent-bit/tree/master/packaging/testing/smoke/container) include examples to verify during CI. -#### Inline configuration +{% tabs %} +{% tab title="fluent-bit.conf" %} +``` +[SERVICE] + flush 1 + daemon off + log_level debug -The [Fluent Bit smoke tests](https://github.com/fluent/fluent-bit/tree/master/packaging/testing/smoke/container) include examples to verify during CI. +[INPUT] + Name random + Tag test + Samples 10 + +[FILTER] + Name Lua + Match * + call append_tag + code function append_tag(tag, timestamp, record) new_record = record new_record["tag"] = tag return 1, timestamp, new_record end +[OUTPUT] + Name stdout + Match * +``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} ```yaml service: flush: 1 @@ -128,32 +173,28 @@ pipeline: - name: stdout match: "*" ``` +{% endtab %} +{% endtabs %} -In classic mode: +### Number Type -``` -[SERVICE] - flush 1 - daemon off - log_level debug +Lua treats numbers as a `double` type, which means an `integer` type +containing data like user IDs and log levels will be converted to a `double`. +To avoid type conversion, use the `type_int_key` property. -[INPUT] - Name random - Tag test - Samples 10 +### Protected Mode -[FILTER] - Name Lua - Match * - call append_tag - code function append_tag(tag, timestamp, record) new_record = record new_record["tag"] = tag return 1, timestamp, new_record end +Fluent Bit supports protected mode to prevent crashes if it executes an invalid Lua script. +See [Error Handling in Application Code](https://www.lua.org/pil/24.3.1.html) in +the Lua documentation for more information. + + +## Code Examples + +For functional examples of this interface, please refer to the code samples provided in the source code of the project located here: -[OUTPUT] - Name stdout - Match * -``` -#### Environment variable processing +### Processing environment variables As an example that combines a bit of LUA processing with the [Kubernetes filter](./kubernetes.md) that demonstrates using environment variables with LUA regex and substitutions. @@ -166,15 +207,33 @@ The environment variable is set like so: We want to extract the `sandboxbsh` name and add it to our record as a special key. +{% tabs %} +{% tab title="fluent-bit.conf" %} ``` - [FILTER] - Name lua - Alias filter-iots-lua - Match iots_thread.* - Script filters.lua - Call set_landscape_deployment +[FILTER] +Name lua +Alias filter-iots-lua +Match iots_thread.* +Script filters.lua +Call set_landscape_deployment +``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml + filters: + - name: lua + alias: filter-iots-lua + match: iots_thread.* + script: filters.lua + call: set_landscape_deployment +``` +{% endtab %} +{% endtabs %} - filters.lua: | + +filters.lua: +```lua -- Use a Lua function to create some additional entries based -- on substrings from the kubernetes properties. function set_landscape_deployment(tag, timestamp, record) @@ -199,14 +258,6 @@ We want to extract the `sandboxbsh` name and add it to our record as a special k end ``` -### Number Type - -+Lua treats number as double. It means an integer field (e.g. IDs, log levels) will be converted double. To avoid type conversion, The `type_int_key` property is available. - -### Protected Mode - -Fluent Bit supports protected mode to prevent crash when executes invalid Lua script. See also [Error Handling in Application Code](https://www.lua.org/pil/24.3.1.html). - ### Record Split The Lua callback function can return an array of tables (i.e., array of records) in its third _record_ return value. With this feature, the Lua filter can split one input record into multiple records according to custom logic. @@ -227,6 +278,8 @@ end #### Configuration +{% tabs %} +{% tab title="fluent-bit.conf" %} ```python [Input] Name stdin @@ -241,6 +294,24 @@ end Name stdout Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: stdin + filters: + - name: lua + match: '*' + script: test.lua + call: cb_split + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} #### Input @@ -266,7 +337,7 @@ See also [Fluent Bit: PR 811](https://github.com/fluent/fluent-bit/pull/811). ### Response code filtering -In this example, we want to filter istio logs to exclude lines with response codes between 1 and 399. +In this example, we want to filter Istio logs to exclude lines with response codes between 1 and 399. Istio is configured to write the logs in json format. #### Lua script @@ -288,27 +359,52 @@ end #### Configuration -Configuration to get istio logs and apply response code filter to them. +Configuration to get Istio logs and apply response code filter to them. +{% tabs %} +{% tab title="fluent-bit.conf" %} ```ini - [INPUT] - Name tail - Path /var/log/containers/*_istio-proxy-*.log - multiline.parser docker, cri - Tag istio.* - Mem_Buf_Limit 64MB - Skip_Long_Lines Off +[INPUT] + Name tail + Path /var/log/containers/*_istio-proxy-*.log + multiline.parser docker, cri + Tag istio.* + Mem_Buf_Limit 64MB + Skip_Long_Lines Off + +[FILTER] + Name lua + Match istio.* + Script response_code_filter.lua + call cb_response_code_filter - [FILTER] - Name lua - Match istio.* - Script response_code_filter.lua - call cb_response_code_filter +[Output] + Name stdout + Match * +``` +{% endtab %} - [Output] - Name stdout - Match * +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: tail + path: /var/log/containers/*_istio-proxy-*.log + multiline.parser: 'docker, cri' + tag: istio.* + mem_buf_limit: 64MB + skip_long_lines: off + filters: + - name: lua + match: istio.* + script: response_code_filter.lua + call: cb_response_code_filter + outputs: + - name: stdout + match: '*' ``` +{% endtab %} +{% endtabs %} #### Input @@ -348,3 +444,185 @@ Configuration to get istio logs and apply response code filter to them. #### Output In the output only the messages with response code 0 or greater than 399 are shown. + +### Time format Conversion + +The following example converts a field's specific type of `datetime` format to +`utc ISO 8601` format. + +#### Lua script + +Script `custom_datetime_format.lua` + +```lua +function convert_to_utc(tag, timestamp, record) + local date_time = record["pub_date"] + local new_record = record + if date_time then + if string.find(date_time, ",") then + local pattern = "(%a+, %d+ %a+ %d+ %d+:%d+:%d+) ([+-]%d%d%d%d)" + local date_part, zone_part = date_time:match(pattern) + + if date_part and zone_part then + local command = string.format("date -u -d '%s %s' +%%Y-%%m-%%dT%%H:%%M:%%SZ", date_part, zone_part) + local handle = io.popen(command) + local result = handle:read("*a") + handle:close() + new_record["pub_date"] = result:match("%S+") + end + end + end + return 1, timestamp, new_record +end +``` + +#### Configuration + +Use this configuration to obtain a JSON key with `datetime`, and then convert it to +another format. + +{% tabs %} +{% tab title="fluent-bit.conf" %} +```ini +[INPUT] + Name dummy + Dummy {"event": "Restock", "pub_date": "Tue, 30 Jul 2024 18:01:06 +0000"} + Tag event_category_a + +[INPUT] + Name dummy + Dummy {"event": "Soldout", "pub_date": "Mon, 29 Jul 2024 10:15:00 +0600"} + Tag event_category_b + + +[FILTER] + Name lua + Match * + Script custom_datetime_format.lua + call convert_to_utc + +[Output] + Name stdout + Match * +``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"event": "Restock", "pub_date": "Tue, 30 Jul 2024 18:01:06 +0000"}' + tag: event_category_a + + - name: dummy + dummy: '{"event": "Soldout", "pub_date": "Mon, 29 Jul 2024 10:15:00 +0600"}' + tag: event_category_b + + filters: + - name: lua + match: '*' + code: | + function convert_to_utc(tag, timestamp, record) + local date_time = record["pub_date"] + local new_record = record + if date_time then + if string.find(date_time, ",") then + local pattern = "(%a+, %d+ %a+ %d+ %d+:%d+:%d+) ([+-]%d%d%d%d)" + local date_part, zone_part = date_time:match(pattern) + if date_part and zone_part then + local command = string.format("date -u -d '%s %s' +%%Y-%%m-%%dT%%H:%%M:%%SZ", date_part, zone_part) + local handle = io.popen(command) + local result = handle:read("*a") + handle:close() + new_record["pub_date"] = result:match("%S+") + end + end + end + return 1, timestamp, new_record + end + call: convert_to_utc + + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} + +#### Input + +```json +{"event": "Restock", "pub_date": "Tue, 30 Jul 2024 18:01:06 +0000"} +``` +and + +```json +{"event": "Soldout", "pub_date": "Mon, 29 Jul 2024 10:15:00 +0600"} +``` +Which are handled by dummy in this example. + +#### Output + +The output of this process shows the conversion of the `datetime` of two timezones to +`ISO 8601` format in `UTC`. + +```ini +... +[2024/08/01 00:56:25] [ info] [output:stdout:stdout.0] worker #0 started +[0] event_category_a: [[1722452186.727104902, {}], {"event"=>"Restock", "pub_date"=>"2024-07-30T18:01:06Z"}] +[0] event_category_b: [[1722452186.730255842, {}], {"event"=>"Soldout", "pub_date"=>"2024-07-29T04:15:00Z"}] +... +``` + +### Using configuration variables + +Fluent Bit supports definition of configuration variables, which can be done in the following way: + +```yaml +env: + myvar1: myvalue1 +``` + +These variables can be accessed from the Lua code by referring to the FLB_ENV Lua table. +Being this a Lua table, the subrecords can be accessed following the same syntax, i.e. `FLB_ENV['A']`. + +#### Configuration + +```yaml +env: + A: aaa + B: bbb + C: ccc + +service: + flush: 1 + log_level: info + +pipeline: + inputs: + - name: random + tag: test + samples: 10 + + filters: + - name: lua + match: "*" + call: append_tag + code: | + function append_tag(tag, timestamp, record) + new_record = record + new_record["my_env"] = FLB_ENV + return 1, timestamp, new_record + end + + outputs: + - name: stdout + match: "*" +``` + +#### Output + +``` +test: [[1731990257.781970977, {}], {"my_env"=>{"A"=>"aaa", "C"=>"ccc", "HOSTNAME"=>"monox-2.lan", "B"=>"bbb"}, "rand_value"=>4805047635809401856}] +``` diff --git a/pipeline/filters/nest.md b/pipeline/filters/nest.md index 96990ca81..b0262d324 100644 --- a/pipeline/filters/nest.md +++ b/pipeline/filters/nest.md @@ -1,15 +1,16 @@ # Nest -The _Nest Filter_ plugin allows you to operate on or with nested data. Its modes of operation are +The _Nest Filter_ plugin lets you operate on or with nested data. Its modes of operation are: -* `nest` - Take a set of records and place them in a map -* `lift` - Take a map by key and lift its records up +- `nest` - Take a set of records and place them in a map. +- `lift` - Take a map by key and lift its records up. -## Example usage \(nest\) +## Example usage for `nest` -As an example using JSON notation, to nest keys matching the `Wildcard` value `Key*` under a new key `NestKey` the transformation becomes, +As an example using JSON notation, to nest keys matching the `Wildcard` value `Key*` +under a new key `NestKey` the transformation becomes: -_Example \(input\)_ +Input: ```text { @@ -19,7 +20,7 @@ _Example \(input\)_ } ``` -_Example \(output\)_ +Output: ```text { @@ -31,11 +32,12 @@ _Example \(output\)_ } ``` -## Example usage \(lift\) +## Example usage for `lift` -As an example using JSON notation, to lift keys nested under the `Nested_under` value `NestKey*` the transformation becomes, +As an example using JSON notation, to lift keys nested under the `Nested_under` value +`NestKey*` the transformation becomes: -_Example \(input\)_ +Input: ```text { @@ -47,7 +49,7 @@ _Example \(input\)_ } ``` -_Example \(output\)_ +Output: ```text { @@ -61,40 +63,47 @@ _Example \(output\)_ The plugin supports the following configuration parameters: -| Key | Value Format | Operation | Description | +| Key | Value format | Operation | Description | | :--- | :--- | :--- | :--- | -| Operation | ENUM \[`nest` or `lift`\] | | Select the operation `nest` or `lift` | -| Wildcard | FIELD WILDCARD | `nest` | Nest records which field matches the wildcard | -| Nest\_under | FIELD STRING | `nest` | Nest records matching the `Wildcard` under this key | -| Nested\_under | FIELD STRING | `lift` | Lift records nested under the `Nested_under` key | -| Add\_prefix | FIELD STRING | ANY | Prefix affected keys with this string | -| Remove\_prefix | FIELD STRING | ANY | Remove prefix from affected keys if it matches this string | +| `Operation` | ENUM [`nest` or `lift`] | | Select the operation `nest` or `lift` | +| `Wildcard` | FIELD WILDCARD | `nest` | Nest records which field matches the wildcard | +| `Nest_under` | FIELD STRING | `nest` | Nest records matching the `Wildcard` under this key | +| `Nested_under` | FIELD STRING | `lift` | Lift records nested under the `Nested_under` key | +| `Add_prefix` | FIELD STRING | ANY | Prefix affected keys with this string | +| `Remove_prefix` | FIELD STRING | ANY | Remove prefix from affected keys if it matches this string | ## Getting Started -In order to start filtering records, you can run the filter from the command line or through the configuration file. The following invokes the [Memory Usage Input Plugin](../inputs/memory-metrics.md), which outputs the following \(example\), +To start filtering records, run the filter from the command line or through the +configuration file. The following example invokes the +[Memory Usage Input Plugin](../inputs/memory-metrics.md), which outputs the +following: ```text [0] memory: [1488543156, {"Mem.total"=>1016044, "Mem.used"=>841388, "Mem.free"=>174656, "Swap.total"=>2064380, "Swap.used"=>139888, "Swap.free"=>1924492}] ``` -## Example \#1 - nest +## Example 1 - nest ### Command Line -> Note: Using the command line mode requires quotes parse the wildcard properly. The use of a configuration file is recommended. +Using command line mode requires quotes to parse the wildcard properly. The use +of a configuration file is recommended. -The following command will load the _mem_ plugin. Then the _nest_ filter will match the wildcard rule to the keys and nest the keys matching `Mem.*` under the new key `NEST`. +The following command loads the _mem_ plugin. Then the _nest_ filter matches the +wildcard rule to the keys and nests the keys matching `Mem.*` under the new key +`NEST`. -```text -$ bin/fluent-bit -i mem -p 'tag=mem.local' -F nest -p 'Operation=nest' -p 'Wildcard=Mem.*' -p 'Nest_under=Memstats' -p 'Remove_prefix=Mem.' -m '*' -o stdout +```shell copy +bin/fluent-bit -i mem -p 'tag=mem.local' -F nest -p 'Operation=nest' -p 'Wildcard=Mem.*' -p 'Nest_under=Memstats' -p 'Remove_prefix=Mem.' -m '*' -o stdout ``` ### Configuration File {% tabs %} {% tab title="fluent-bit.conf" %} -```python + +```python copy [INPUT] Name mem Tag mem.local @@ -111,10 +120,12 @@ $ bin/fluent-bit -i mem -p 'tag=mem.local' -F nest -p 'Operation=nest' -p 'Wildc Nest_under Memstats Remove_prefix Mem. ``` + {% endtab %} {% tab title="fluent-bit.yaml" %} -```yaml + +```yaml copy pipeline: inputs: - name: mem @@ -130,6 +141,7 @@ pipeline: - name: stdout match: '*' ``` + {% endtab %} {% endtabs %} @@ -142,15 +154,17 @@ The output of both the command line and configuration invocations should be iden [0] mem.local: [1522978514.007359767, {"Swap.total"=>1046524, "Swap.used"=>0, "Swap.free"=>1046524, "Memstats"=>{"total"=>4050908, "used"=>714984, "free"=>3335924}}] ``` -## Example \#2 - nest and lift undo +## Example 2 - nest and lift undo -This example nests all `Mem.*` and `Swap,*` items under the `Stats` key and then reverses these actions with a `lift` operation. The output appears unchanged. +This example nests all `Mem.*` and `Swap.*` items under the `Stats` key and then +reverses these actions with a `lift` operation. The output appears unchanged. -### Configuration File +### Example 2 Configuration File {% tabs %} {% tab title="fluent-bit.conf" %} -```python + +```python copy [INPUT] Name mem Tag mem.local @@ -175,10 +189,11 @@ This example nests all `Mem.*` and `Swap,*` items under the `Stats` key and then Nested_under Stats Remove_prefix NESTED ``` -{% endtab %} +{% endtab %} {% tab title="fluent-bit.yaml" %} -```yaml + +```yaml copy pipeline: inputs: - name: mem @@ -201,6 +216,7 @@ pipeline: - name: stdout match: '*' ``` + {% endtab %} {% endtabs %} @@ -211,15 +227,17 @@ pipeline: [0] mem.local: [1529566958.000940636, {"Mem.total"=>8053656, "Mem.used"=>6940380, "Mem.free"=>1113276, "Swap.total"=>16532988, "Swap.used"=>1286772, "Swap.free"=>15246216}] ``` -## Example \#3 - nest 3 levels deep +## Example 3 - nest 3 levels deep -This example takes the keys starting with `Mem.*` and nests them under `LAYER1`, which itself is then nested under `LAYER2`, which is nested under `LAYER3`. +This example takes the keys starting with `Mem.*` and nests them under `LAYER1`, +which is then nested under `LAYER2`, which is nested under `LAYER3`. -### Configuration File +### Example 3 Configuration File {% tabs %} {% tab title="fluent-bit.conf" %} -```python + +```python copy [INPUT] Name mem Tag mem.local @@ -249,10 +267,11 @@ This example takes the keys starting with `Mem.*` and nests them under `LAYER1`, Wildcard LAYER2* Nest_under LAYER3 ``` -{% endtab %} +{% endtab %} {% tab title="fluent-bit.yaml" %} -```yaml + +```yaml copy pipeline: inputs: - name: mem @@ -277,6 +296,7 @@ pipeline: - name: stdout match: '*' ``` + {% endtab %} {% endtabs %} @@ -302,15 +322,19 @@ pipeline: } ``` -## Example \#4 - multiple nest and lift filters with prefix +## Example 4 - multiple nest and lift filters with prefix -This example starts with the 3-level deep nesting of _Example 2_ and applies the `lift` filter three times to reverse the operations. The end result is that all records are at the top level, without nesting, again. One prefix is added for each level that is lifted. +This example uses the 3-level deep nesting of _Example 2_ and applies the +`lift` filter three times to reverse the operations. The end result is that all +records are at the top level, without nesting, again. One prefix is added for each +level that's lifted. ### Configuration file {% tabs %} {% tab title="fluent-bit.conf" %} -```python + +```python copy [INPUT] Name mem Tag mem.local @@ -361,10 +385,12 @@ This example starts with the 3-level deep nesting of _Example 2_ and applies the Nested_under Lifted3_Lifted2_LAYER1 Add_prefix Lifted3_Lifted2_Lifted1_ ``` + {% endtab %} {% tab title="fluent-bit.yaml" %} -```yaml + +```yaml copy pipeline: inputs: - name: mem @@ -404,23 +430,21 @@ pipeline: - name: stdout match: '*' ``` + {% endtab %} {% endtabs %} - ### Result ```text [0] mem.local: [1524862951.013414798, {"Swap.total"=>1046524, "Swap.used"=>0, "Swap.free"=>1046524, "Lifted3_Lifted2_Lifted1_Mem.total"=>4050908, "Lifted3_Lifted2_Lifted1_Mem.used"=>1253912, "Lifted3_Lifted2_Lifted1_Mem.free"=>2796996}] - { - "Swap.total"=>1046524, - "Swap.used"=>0, - "Swap.free"=>1046524, - "Lifted3_Lifted2_Lifted1_Mem.total"=>4050908, - "Lifted3_Lifted2_Lifted1_Mem.used"=>1253912, + "Swap.total"=>1046524, + "Swap.used"=>0, + "Swap.free"=>1046524, + "Lifted3_Lifted2_Lifted1_Mem.total"=>4050908, + "Lifted3_Lifted2_Lifted1_Mem.used"=>1253912, "Lifted3_Lifted2_Lifted1_Mem.free"=>2796996 } ``` - diff --git a/pipeline/filters/record-modifier.md b/pipeline/filters/record-modifier.md index 0a1f74572..dc64aea7c 100644 --- a/pipeline/filters/record-modifier.md +++ b/pipeline/filters/record-modifier.md @@ -1,24 +1,26 @@ # Record Modifier -The _Record Modifier Filter_ plugin allows to append fields or to exclude specific fields. +The _Record Modifier_ [filter](pipeline/filters.md) plugin lets you append +fields to a record, or exclude specific fields. -## Configuration Parameters +## Configuration parameters -The plugin supports the following configuration parameters: _Remove\_key_ and _Allowlist\_key_ are exclusive. +The plugin supports the following configuration parameters: | Key | Description | | :--- | :--- | -| Record | Append fields. This parameter needs key and value pair. | -| Remove\_key | If the key is matched, that field is removed. | -| Allowlist\_key | If the key is **not** matched, that field is removed. | -| Whitelist\_key | An alias of `Allowlist_key` for backwards compatibility. | -| Uuid\_key| If set, the plugin appends uuid to each record. The value assigned becomes the key in the map.| +| `Record` | Append fields. This parameter needs a key/value pair. | +| `Remove_key` | If the key is matched, that field is removed. You can this or `Allowlist_key`.| +| `Allowlist_key` | If the key isn't matched, that field is removed. You can this or `Remove_key`. | +| `Whitelist_key` | An alias of `Allowlist_key` for backwards compatibility. | +| `Uuid_key` | If set, the plugin appends Uuid to each record. The value assigned becomes the key in the map. | -## Getting Started +## Get started -In order to start filtering records, you can run the filter from the command line or through the configuration file. +To start filtering records, run the filter from the command line or through a +configuration file. -This is a sample in\_mem record to filter. +This is a sample `in_mem` record to filter. ```text {"Mem.total"=>1016024, "Mem.used"=>716672, "Mem.free"=>299352, "Swap.total"=>2064380, "Swap.used"=>32656, "Swap.free"=>2031724} @@ -26,11 +28,13 @@ This is a sample in\_mem record to filter. ### Append fields -The following configuration file is to append product name and hostname \(via environment variable\) to record. +The following configuration file appends a product name and hostname to a record +using an environment variable: {% tabs %} {% tab title="fluent-bit.conf" %} -```python + +```python copy [INPUT] Name mem Tag mem.local @@ -45,10 +49,12 @@ The following configuration file is to append product name and hostname \(via en Record hostname ${HOSTNAME} Record product Awesome_Tool ``` + {% endtab %} {% tab title="fluent-bit.yaml" %} -```yaml + +```yaml copy pipeline: inputs: - name: mem @@ -56,37 +62,37 @@ pipeline: filters: - name: record_modifier match: '*' - record: + record: - hostname ${HOSTNAME} - product Awesome_Tool outputs: - name: stdout match: '*' ``` + {% endtab %} {% endtabs %} +You can run the filter from command line: -You can also run the filter from command line. - -```text -$ fluent-bit -i mem -o stdout -F record_modifier -p 'Record=hostname ${HOSTNAME}' -p 'Record=product Awesome_Tool' -m '*' +```shell copy +fluent-bit -i mem -o stdout -F record_modifier -p 'Record=hostname ${HOSTNAME}' -p 'Record=product Awesome_Tool' -m '*' ``` -The output will be +The output looks something like: -```python +```python copy [0] mem.local: [1492436882.000000000, {"Mem.total"=>1016024, "Mem.used"=>716672, "Mem.free"=>299352, "Swap.total"=>2064380, "Swap.used"=>32656, "Swap.free"=>2031724, "hostname"=>"localhost.localdomain", "product"=>"Awesome_Tool"}] ``` -### Remove fields with Remove\_key - -The following configuration file is to remove 'Swap.\*' fields. +### Remove fields with `Remove_key` +The following configuration file removes `Swap.*` fields: {% tabs %} {% tab title="fluent-bit.conf" %} -```python + +```python copy [INPUT] Name mem Tag mem.local @@ -102,10 +108,12 @@ The following configuration file is to remove 'Swap.\*' fields. Remove_key Swap.used Remove_key Swap.free ``` + {% endtab %} {% tab title="fluent-bit.yaml" %} -```yaml + +```yaml copy pipeline: inputs: - name: mem @@ -113,7 +121,7 @@ pipeline: filters: - name: record_modifier match: '*' - remove_key: + remove_key: - Swap.total - Swap.used - Swap.free @@ -121,28 +129,30 @@ pipeline: - name: stdout match: '*' ``` + {% endtab %} {% endtabs %} You can also run the filter from command line. -```text -$ fluent-bit -i mem -o stdout -F record_modifier -p 'Remove_key=Swap.total' -p 'Remove_key=Swap.free' -p 'Remove_key=Swap.used' -m '*' +```shell copy +fluent-bit -i mem -o stdout -F record_modifier -p 'Remove_key=Swap.total' -p 'Remove_key=Swap.free' -p 'Remove_key=Swap.used' -m '*' ``` -The output will be +The output looks something like: ```python [0] mem.local: [1492436998.000000000, {"Mem.total"=>1016024, "Mem.used"=>716672, "Mem.free"=>295332}] ``` -### Remove fields with Allowlist\_key +### Retain fields with `Allowlist_key` -The following configuration file is to remain 'Mem.\*' fields. +The following configuration file retains `Mem.*` fields. {% tabs %} {% tab title="fluent-bit.conf" %} -```python + +```python copy [INPUT] Name mem Tag mem.local @@ -158,10 +168,12 @@ The following configuration file is to remain 'Mem.\*' fields. Allowlist_key Mem.used Allowlist_key Mem.free ``` + {% endtab %} {% tab title="fluent-bit.yaml" %} -```yaml + +```yaml copy pipeline: inputs: - name: mem @@ -169,7 +181,7 @@ pipeline: filters: - name: record_modifier match: '*' - Allowlist_key: + Allowlist_key: - Mem.total - Mem.used - Mem.free @@ -177,18 +189,18 @@ pipeline: - name: stdout match: '*' ``` + {% endtab %} {% endtabs %} -You can also run the filter from command line. +You can also run the filter from command line: -```text -$ fluent-bit -i mem -o stdout -F record_modifier -p 'Allowlist_key=Mem.total' -p 'Allowlist_key=Mem.free' -p 'Allowlist_key=Mem.used' -m '*' +```shell copy +fluent-bit -i mem -o stdout -F record_modifier -p 'Allowlist_key=Mem.total' -p 'Allowlist_key=Mem.free' -p 'Allowlist_key=Mem.used' -m '*' ``` -The output will be +The output looks something like: ```python [0] mem.local: [1492436998.000000000, {"Mem.total"=>1016024, "Mem.used"=>716672, "Mem.free"=>295332}] ``` - diff --git a/pipeline/filters/rewrite-tag.md b/pipeline/filters/rewrite-tag.md index 17574ad2a..72a991c1b 100644 --- a/pipeline/filters/rewrite-tag.md +++ b/pipeline/filters/rewrite-tag.md @@ -59,7 +59,7 @@ If we wanted to match against the value of the key `name` we must use `$name`. T * `$name` = "abc-123" * `$ss['s1']['s2']` = "flb" -Note that a key must point a value that contains a string, it's **not valid** for numbers, booleans, maps or arrays. +Note that a key must point to a value that contains a string, it's **not valid** for numbers, booleans, maps or arrays. ### Regex diff --git a/pipeline/filters/type-converter.md b/pipeline/filters/type-converter.md index b185467f8..d55d5be40 100644 --- a/pipeline/filters/type-converter.md +++ b/pipeline/filters/type-converter.md @@ -2,6 +2,8 @@ The _Type Converter Filter_ plugin allows to convert data type and append new key value pair. +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=8984f540-d95a-462b-8a08-09f72f5fab63" /> + This plugin is useful in combination with plugins which expect incoming string value. e.g. [filter_grep](grep.md), [filter_modify](modify.md) diff --git a/pipeline/filters/wasm.md b/pipeline/filters/wasm.md index 913fe2359..0140bc28d 100644 --- a/pipeline/filters/wasm.md +++ b/pipeline/filters/wasm.md @@ -19,8 +19,11 @@ The plugin supports the following configuration parameters: | Key | Description | | :--- | :--- | | Wasm\_Path | Path to the built Wasm program that will be used. This can be a relative path against the main configuration file. | +| Event\_Format | Define event format to interact with Wasm programs: msgpack or json. Default: json | | Function\_Name | Wasm function name that will be triggered to do filtering. It's assumed that the function is built inside the Wasm program specified above. | -| Accessible\_Paths | Specify the whilelist of paths to be able to access paths from WASM programs. | +| Accessible\_Paths | Specify the whitelist of paths to be able to access paths from WASM programs. | +| Wasm\_Heap\_Size | Size of the heap size of Wasm execution. Review [unit sizes](../../administration/configuring-fluent-bit/unit-sizes.md) for allowed values. | +| Wasm\_Stack\_Size | Size of the stack size of Wasm execution. Review [unit sizes](../../administration/configuring-fluent-bit/unit-sizes.md) for allowed values. | ## Configuration Examples <a id="config_example"></a> @@ -34,6 +37,7 @@ Here is a configuration example. [FILTER] Name wasm Match dummy.* + Event_Format json # or msgpack WASM_Path /path/to/wasm_program.wasm Function_Name filter_function_name Accessible_Paths .,/path/to/accessible diff --git a/pipeline/inputs/collectd.md b/pipeline/inputs/collectd.md index acbf773c1..b10ea73fd 100644 --- a/pipeline/inputs/collectd.md +++ b/pipeline/inputs/collectd.md @@ -11,6 +11,7 @@ The plugin supports the following configuration parameters: | Listen | Set the address to listen to | 0.0.0.0 | | Port | Set the port to listen to | 25826 | | TypesDB | Set the data specification file | /usr/share/collectd/types.db | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Configuration Examples <a id="config_example"></a> @@ -31,4 +32,3 @@ Here is a basic configuration example. With this configuration, Fluent Bit listens to `0.0.0.0:25826`, and outputs incoming datagram packets to stdout. You must set the same types.db files that your collectd server uses. Otherwise, Fluent Bit may not be able to interpret the payload properly. - diff --git a/pipeline/inputs/cpu-metrics.md b/pipeline/inputs/cpu-metrics.md index 3c296f0cd..c54558cbf 100644 --- a/pipeline/inputs/cpu-metrics.md +++ b/pipeline/inputs/cpu-metrics.md @@ -4,13 +4,15 @@ The **cpu** input plugin, measures the CPU usage of a process or the whole syste The following tables describes the information generated by the plugin. The keys below represent the data used by the overall system, all values associated to the keys are in a percentage unit \(0 to 100%\): -The CPU metrics plugin creates metrics that are log-based \(I.e. JSON payload\). If you are looking for Prometheus-based metrics please see the Node Exporter Metrics input plugin. +The CPU metrics plugin creates metrics that are log-based, such as JSON payload. For +Prometheus-based metrics, see the Node Exporter Metrics input plugin. | key | description | | :--- | :--- | | cpu\_p | CPU usage of the overall system, this value is the summation of time spent on user and kernel space. The result takes in consideration the numbers of CPU cores in the system. | | user\_p | CPU usage in User mode, for short it means the CPU usage by user space programs. The result of this value takes in consideration the numbers of CPU cores in the system. | | system\_p | CPU usage in Kernel mode, for short it means the CPU usage by the Kernel. The result of this value takes in consideration the numbers of CPU cores in the system. | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | In addition to the keys reported in the above table, a similar content is created **per** CPU core. The cores are listed from _0_ to _N_ as the Kernel reports: diff --git a/pipeline/inputs/disk-io-metrics.md b/pipeline/inputs/disk-io-metrics.md index 024399314..c28cc4acf 100644 --- a/pipeline/inputs/disk-io-metrics.md +++ b/pipeline/inputs/disk-io-metrics.md @@ -2,7 +2,8 @@ The **disk** input plugin, gathers the information about the disk throughput of the running system every certain interval of time and reports them. -The Disk I/O metrics plugin creates metrics that are log-based \(I.e. JSON payload\). If you are looking for Prometheus-based metrics please see the Node Exporter Metrics input plugin. +The Disk I/O metrics plugin creates metrics that are log-based, such as JSON payload. +For Prometheus-based metrics, see the Node Exporter Metrics input plugin. ## Configuration Parameters @@ -13,6 +14,7 @@ The plugin supports the following configuration parameters: | Interval\_Sec | Polling interval \(seconds\). | 1 | | Interval\_NSec | Polling interval \(nanosecond\). | 0 | | Dev\_Name | Device name to limit the target. \(e.g. sda\). If not set, _in\_disk_ gathers information from all of disks and partitions. | all disks | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -72,4 +74,3 @@ pipeline: Note: Total interval \(sec\) = Interval\_Sec + \(Interval\_Nsec / 1000000000\). e.g. 1.5s = 1s + 500000000ns - diff --git a/pipeline/inputs/docker-events.md b/pipeline/inputs/docker-events.md index 6e850c1ee..40d85ec5c 100644 --- a/pipeline/inputs/docker-events.md +++ b/pipeline/inputs/docker-events.md @@ -14,6 +14,7 @@ This plugin supports the following configuration parameters: | Key | When a message is unstructured \(no parser applied\), it's appended as a string under the key name _message_. | message | | Reconnect.Retry_limits| The maximum number of retries allowed. The plugin tries to reconnect with docker socket when EOF is detected. | 5 | | Reconnect.Retry_interval| The retrying interval. Unit is second. | 1 | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ### Command Line diff --git a/pipeline/inputs/docker-metrics.md b/pipeline/inputs/docker-metrics.md index 1102e576f..23c8d2128 100644 --- a/pipeline/inputs/docker-metrics.md +++ b/pipeline/inputs/docker-metrics.md @@ -6,12 +6,7 @@ description: >- # Docker Metrics -Content: - -* [Configuration Parameters](https://app.gitbook.com/s/-LKKSx-3LBTCtaHbg0gl-887967055/pipeline/inputs/docker.md#configuration-parameters) -* [Configuration File](https://app.gitbook.com/s/-LKKSx-3LBTCtaHbg0gl-887967055/pipeline/inputs/docker.md#configuration-file) - -### Configuration Parameters +## Configuration Parameters The plugin supports the following configuration parameters: @@ -20,10 +15,12 @@ The plugin supports the following configuration parameters: | Interval_Sec | Polling interval in seconds | 1 | | Include | A space-separated list of containers to include | | | Exclude | A space-separated list of containers to exclude | | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | +| path.containers | Used to specify the container directory if Docker is configured with a custom "data-root" directory. | `/var/lib/docker/containers` | If you set neither `Include` nor `Exclude`, the plugin will try to get metrics from _all_ the running containers. -### Configuration File +## Configuration File Here is an example configuration that collects metrics from two docker instances (`6bab19c3a0f9` and `14159be4ca2c`). diff --git a/pipeline/inputs/dummy.md b/pipeline/inputs/dummy.md index 745bacaa2..48177ac10 100644 --- a/pipeline/inputs/dummy.md +++ b/pipeline/inputs/dummy.md @@ -6,18 +6,19 @@ The **dummy** input plugin, generates dummy events. It is useful for testing, de The plugin supports the following configuration parameters: -| Key | Description | -| :--- | :--- | -| Dummy | Dummy JSON record. Default: `{"message":"dummy"}` | -| Metadata | Dummy JSON metadata. Default: `{}` | -| Start\_time\_sec | Dummy base timestamp in seconds. Default: 0 | -| Start\_time\_nsec | Dummy base timestamp in nanoseconds. Default: 0 | -| Rate | Rate at which messages are generated expressed in how many times per second. Default: 1 | -| Interval\_sec | Set seconds of time interval at which every message is generated. If set, `Rate` configuration will be ignored. Default: 0 | -| Interval\_nsec | Set nanoseconds of time interval at which every message is generated. If set, `Rate` configuration will be ignored. Default: 0 | -| Samples | If set, the events number will be limited. e.g. If Samples=3, the plugin only generates three events and stops. | -| Copies | Number of messages to generate each time they are generated. Defaults to 1. | -| Flush\_on\_startup | If set to `true`, the first dummy event is generated at startup. Default: `false` | +| Key | Description | Default | +| :----------------- | :---------- | :------ | +| Dummy | Dummy JSON record. | `{"message":"dummy"}` | +| Metadata | Dummy JSON metadata. | `{}` | +| Start\_time\_sec | Dummy base timestamp, in seconds. | `0` | +| Start\_time\_nsec | Dummy base timestamp, in nanoseconds. | `0` | +| Rate | Rate at which messages are generated expressed in how many times per second. | `1` | +| Interval\_sec | Set time interval, in seconds, at which every message is generated. If set, `Rate` configuration is ignored. | `0` | +| Interval\_nsec | Set time interval, in nanoseconds, at which every message is generated. If set, `Rate` configuration is ignored. | `0` | +| Samples | If set, the events number will be limited. For example, if Samples=3, the plugin generates only three events and stops. | _none_ | +| Copies | Number of messages to generate each time they are generated. | `1` | +| Flush\_on\_startup | If set to `true`, the first dummy event is generated at startup. | `false` | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started diff --git a/pipeline/inputs/ebpf.md b/pipeline/inputs/ebpf.md new file mode 100644 index 000000000..767d032f1 --- /dev/null +++ b/pipeline/inputs/ebpf.md @@ -0,0 +1,76 @@ +# `in_ebpf` Input Plugin for Fluent Bit (Experimental) + +> **Note:** This plugin is experimental and may be unstable. Use it in development or testing environments only, as its features and behavior are subject to change. + +The `in_ebpf` input plugin is an **experimental** plugin for Fluent Bit that uses eBPF (extended Berkeley Packet Filter) to capture low-level system events. This plugin allows Fluent Bit to monitor kernel-level activities such as process executions, file accesses, memory allocations, network connections, and signal handling. It provides valuable insights into system behavior for debugging, monitoring, and security analysis. + +## Overview + +The `in_ebpf` plugin leverages eBPF to trace kernel events in real-time. By specifying trace points, users can collect targeted system-level metrics and events, which can be particularly useful for gaining visibility into operating system interactions and performance characteristics. + +## System Dependencies + +To enable `in_ebpf`, ensure the following dependencies are installed on your system: +- **Kernel Version**: 4.18 or higher with eBPF support enabled. +- **Required Packages**: + - `bpftool`: Used to manage and debug eBPF programs. + - `libbpf-dev`: Provides the `libbpf` library for loading and interacting with eBPF programs. + - **CMake** 3.13 or higher: Required for building the plugin. + +### Installing Dependencies on Ubuntu +```bash +sudo apt update +sudo apt install libbpf-dev linux-tools-common cmake +``` + +## Building Fluent Bit with `in_ebpf` + +To enable the `in_ebpf` plugin, follow these steps to build Fluent Bit from source: + +1. **Clone the Fluent Bit Repository** +```bash +git clone https://github.com/fluent/fluent-bit.git +cd fluent-bit +``` + +2. **Configure the Build with `in_ebpf`** + +Create a build directory and run `cmake` with the `-DFLB_IN_EBPF=On` flag to enable the `in_ebpf` plugin: +```bash +mkdir build +cd build +cmake .. -DFLB_IN_EBPF=On +``` + +3. **Compile the Source** +```bash +make +``` + +4. **Run Fluent Bit** + +Run Fluent Bit with elevated permissions (e.g., `sudo`), as loading eBPF programs requires root access or appropriate privileges: +```bash +sudo ./bin/fluent-bit -c path/to/your_config.conf +``` + +## Configuration Example + +Here's a basic example of how to configure the plugin: + +``` +[INPUT] + Name ebpf + Trace trace_signal + Trace trace_malloc + Trace trace_bind +``` + +The configuration above enables tracing for: +- Signal handling events (`trace_signal`) +- Memory allocation events (`trace_malloc`) +- Network bind operations (`trace_bind`) + +You can enable multiple traces by adding multiple `Trace` directives in your configuration. +Full list of existing traces can be seen here: [Fluent Bit eBPF Traces](https://github.com/fluent/fluent-bit/tree/master/plugins/in_ebpf/traces) + diff --git a/pipeline/inputs/elasticsearch.md b/pipeline/inputs/elasticsearch.md index 27f659095..08a67d795 100644 --- a/pipeline/inputs/elasticsearch.md +++ b/pipeline/inputs/elasticsearch.md @@ -14,6 +14,7 @@ The plugin supports the following configuration parameters: | meta\_key | Specify a key name for meta information. | "@meta" | | hostname | Specify hostname or FQDN. This parameter can be used for "sniffing" (auto-discovery of) cluster node information. | "localhost" | | version | Specify Elasticsearch server version. This parameter is effective for checking a version of Elasticsearch/OpenSearch server version. | "8.0.0" | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | **Note:** The Elasticsearch cluster uses "sniffing" to optimize the connections between its cluster and clients. Elasticsearch can build its cluster and dynamically generate a connection list which is called "sniffing". diff --git a/pipeline/inputs/exec-wasi.md b/pipeline/inputs/exec-wasi.md index b094f3a01..dabd0a065 100644 --- a/pipeline/inputs/exec-wasi.md +++ b/pipeline/inputs/exec-wasi.md @@ -10,11 +10,14 @@ The plugin supports the following configuration parameters: | :--- | :--- | | WASI\_Path | The place of a WASM program file. | | Parser | Specify the name of a parser to interpret the entry as a structured message. | -| Accessible\_Paths | Specify the whilelist of paths to be able to access paths from WASM programs. | +| Accessible\_Paths | Specify the whitelist of paths to be able to access paths from WASM programs. | | Interval\_Sec | Polling interval \(seconds\). | | Interval\_NSec | Polling interval \(nanosecond\). | -| Buf\_Size | Size of the buffer \(check [unit sizes](https://docs.fluentbit.io/manual/configuration/unit_sizes) for allowed values\) | +| Wasm\_Heap\_Size | Size of the heap size of Wasm execution. Review [unit sizes](../../administration/configuring-fluent-bit/unit-sizes.md) for allowed values. | +| Wasm\_Stack\_Size | Size of the stack size of Wasm execution. Review [unit sizes](../../administration/configuring-fluent-bit/unit-sizes.md) for allowed values. | +| Buf\_Size | Size of the buffer \(check [unit sizes](../../administration/configuring-fluent-bit/unit-sizes.md) for allowed values\) | | Oneshot | Only run once at startup. This allows collection of data precedent to fluent-bit's startup (bool, default: false) | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | ## Configuration Examples diff --git a/pipeline/inputs/exec.md b/pipeline/inputs/exec.md index 2f7a32416..73b4c3450 100644 --- a/pipeline/inputs/exec.md +++ b/pipeline/inputs/exec.md @@ -21,10 +21,11 @@ The plugin supports the following configuration parameters: | Parser | Specify the name of a parser to interpret the entry as a structured message. | | Interval\_Sec | Polling interval \(seconds\). | | Interval\_NSec | Polling interval \(nanosecond\). | -| Buf\_Size | Size of the buffer \(check [unit sizes](https://docs.fluentbit.io/manual/configuration/unit_sizes) for allowed values\) | +| Buf\_Size | Size of the buffer \(check [unit sizes](../../administration/configuring-fluent-bit/unit-sizes.md) for allowed values\) | | Oneshot | Only run once at startup. This allows collection of data precedent to fluent-bit's startup (bool, default: false) | | Exit\_After\_Oneshot | Exit as soon as the one-shot command exits. This allows the exec plugin to be used as a wrapper for another command, sending the target command's output to any fluent-bit sink(s) then exiting. (bool, default: false) | | Propagate\_Exit\_Code | When exiting due to Exit\_After\_Oneshot, cause fluent-bit to exit with the exit code of the command exited by this plugin. Follows [shell conventions for exit code propagation](https://www.gnu.org/software/bash/manual/html_node/Exit-Status.html). (bool, default: false) | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | ## Getting Started diff --git a/pipeline/inputs/fluentbit-metrics.md b/pipeline/inputs/fluentbit-metrics.md index 9ef8c604e..358ac92b3 100644 --- a/pipeline/inputs/fluentbit-metrics.md +++ b/pipeline/inputs/fluentbit-metrics.md @@ -12,12 +12,13 @@ They can be sent to output plugins including [Prometheus Exporter](../outputs/pr **Important note:** Metrics collected with Node Exporter Metrics flow through a separate pipeline from logs and current filters do not operate on top of metrics. -## Configuration +## Configuration | Key | Description | Default | | --------------- | --------------------------------------------------------------------------------------------------------- | --------- | | scrape_interval | The rate at which metrics are collected from the host operating system | 2 seconds | | scrape_on_start | Scrape metrics upon start, useful to avoid waiting for 'scrape_interval' for the first round of metrics. | false | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started diff --git a/pipeline/inputs/forward.md b/pipeline/inputs/forward.md index 173467b62..f2f02466c 100644 --- a/pipeline/inputs/forward.md +++ b/pipeline/inputs/forward.md @@ -17,6 +17,11 @@ The plugin supports the following configuration parameters: | Buffer\_Chunk\_Size | By default the buffer to store the incoming Forward messages, do not allocate the maximum memory allowed, instead it allocate memory when is required. The rounds of allocations are set by _Buffer\_Chunk\_Size_. The value must be according to the [Unit Size ](../../administration/configuring-fluent-bit/unit-sizes.md)specification. | 1024000 | | Tag_Prefix | Prefix incoming tag with the defined value.| | | Tag | Override the tag of the forwarded events with the defined value.| | +| Shared\_Key | Shared key for secure forward authentication. | | +| Empty\_Shared\_Key | Use this option to connect to Fluentd with a zero-length shared key. | `false` | +| Self\_Hostname | Hostname for secure forward authentication. | | +| Security.Users | Specify the username and password pairs for secure forward authentication. | | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -74,6 +79,51 @@ pipeline: {% endtab %} {% endtabs %} +## Fluent Bit + Secure Forward Setup + +Since Fluent Bit v3, in\_forward can handle secure forward protocol. + +For using user-password authentication, it needs to specify `security.users` at least an one-pair. +For using shared key, it needs to specify `shared_key` in both of forward output and forward input. +`self_hostname` is not able to specify with the same hostname between fluent servers and clients. + +{% tabs %} +{% tab title="fluent-bit-secure-forward.conf" %} +```python +[INPUT] + Name forward + Listen 0.0.0.0 + Port 24224 + Buffer_Chunk_Size 1M + Buffer_Max_Size 6M + Security.Users fluentbit changeme + Shared_Key secret + Self_Hostname flb.server.local + +[OUTPUT] + Name stdout + Match * +``` +{% endtab %} + +{% tab title="fluent-bit-secure-forward.yaml" %} +```yaml +pipeline: + inputs: + - name: forward + listen: 0.0.0.0 + port: 24224 + buffer_chunk_size: 1M + buffer_max_size: 6M + security.users: fluentbit changeme + shared_key: secret + self_hostname: flb.server.local + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} ## Testing @@ -94,4 +144,3 @@ Copyright (C) Treasure Data [2016/10/07 21:49:40] [ info] [in_fw] binding 0.0.0.0:24224 [0] my_tag: [1475898594, {"key 1"=>123456789, "key 2"=>"abcdefg"}] ``` - diff --git a/pipeline/inputs/head.md b/pipeline/inputs/head.md index b5f4496b4..6537f0ee2 100644 --- a/pipeline/inputs/head.md +++ b/pipeline/inputs/head.md @@ -16,6 +16,7 @@ The plugin supports the following configuration parameters: | Key | Rename a key. Default: head. | | Lines | Line number to read. If the number N is set, in\_head reads first N lines like head\(1\) -n. | | Split\_line | If enabled, in\_head generates key-value pair per line. | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | ### Split Line Mode @@ -84,7 +85,7 @@ pipeline: Output is ```bash -$ bin/fluent-bit -c head.conf +$ bin/fluent-bit -c head.conf Fluent Bit v1.x.x * Copyright (C) 2019-2020 The Fluent Bit Authors * Copyright (C) 2015-2018 Treasure Data @@ -162,4 +163,3 @@ pipeline: Note: Total interval \(sec\) = Interval\_Sec + \(Interval\_Nsec / 1000000000\). e.g. 1.5s = 1s + 500000000ns - diff --git a/pipeline/inputs/health.md b/pipeline/inputs/health.md index 0e5694338..51028e735 100644 --- a/pipeline/inputs/health.md +++ b/pipeline/inputs/health.md @@ -15,6 +15,7 @@ The plugin supports the following configuration parameters: | Alert | If enabled, it will only generate messages if the target TCP service is down. By default this option is disabled. | | Add\_Host | If enabled, hostname is appended to each records. Default value is _false_. | | Add\_Port | If enabled, port number is appended to each records. Default value is _false_. | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | ## Getting Started @@ -87,4 +88,3 @@ Fluent Bit v1.8.0 [2] health.0: [1624145990.306498573, {"alive"=>true}] [3] health.0: [1624145991.305595498, {"alive"=>true}] ``` - diff --git a/pipeline/inputs/http.md b/pipeline/inputs/http.md index c83497ae7..52150a24b 100644 --- a/pipeline/inputs/http.md +++ b/pipeline/inputs/http.md @@ -15,11 +15,16 @@ description: The HTTP input plugin allows you to send custom records to an HTTP | buffer_chunk_size | This sets the chunk size for incoming incoming JSON messages. These chunks are then stored/managed in the space available by buffer_max_size. | 512K | | successful_response_code | It allows to set successful response code. `200`, `201` and `204` are supported. | 201 | | success_header | Add an HTTP header key/value pair on success. Multiple headers can be set. Example: `X-Custom custom-answer` | | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ### TLS / SSL HTTP input plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [Transport Security](../../administration/transport-security.md) section. +### gzipped content + +The HTTP input plugin will accept and automatically handle gzipped content as of v2.2.1 as long as the header `Content-Encoding: gzip` is set on the received data. + ## Getting Started The http input plugin allows Fluent Bit to open up an HTTP port that you can then route data to in a dynamic way. This plugin supports dynamic tags which allow you to send data with different tags through the same input. An example video and curl message can be seen below diff --git a/pipeline/inputs/kafka.md b/pipeline/inputs/kafka.md index e779b6f07..a0c83b97d 100644 --- a/pipeline/inputs/kafka.md +++ b/pipeline/inputs/kafka.md @@ -14,8 +14,8 @@ This plugin uses the official [librdkafka C library](https://github.com/edenhill | group\_id | Group id passed to librdkafka. | fluent-bit | | poll\_ms | Kafka brokers polling interval in milliseconds. | 500 | | Buffer\_Max\_Size | Specify the maximum size of buffer per cycle to poll kafka messages from subscribed topics. To increase throughput, specify larger size. | 4M | -| poll\_ms | Kafka brokers polling interval in milliseconds. | 500 | | rdkafka.{property} | `{property}` can be any [librdkafka properties](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) | | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -46,7 +46,8 @@ In your main configuration file append the following _Input_ & _Output_ sections #### Example of using kafka input/output plugins -The fluent-bit source repository contains a full example of using fluent-bit to process kafka records: +The Fluent Bit source repository contains a full example of using Fluent Bit to +process Kafka records: ```text [INPUT] diff --git a/pipeline/inputs/kernel-logs.md b/pipeline/inputs/kernel-logs.md index 7fa9cf143..9614391a5 100644 --- a/pipeline/inputs/kernel-logs.md +++ b/pipeline/inputs/kernel-logs.md @@ -7,6 +7,7 @@ The **kmsg** input plugin reads the Linux Kernel log buffer since the beginning, | Key | Description | Default | | :--- | :--- | :--- | | Prio_Level | The log level to filter. The kernel log is dropped if its priority is more than prio_level. Allowed values are 0-8. Default is 8. 8 means all logs are saved. | 8 | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -60,4 +61,3 @@ pipeline: ``` {% endtab %} {% endtabs %} - diff --git a/pipeline/inputs/kubernetes-events.md b/pipeline/inputs/kubernetes-events.md index 164c4eb60..1bfb7d134 100644 --- a/pipeline/inputs/kubernetes-events.md +++ b/pipeline/inputs/kubernetes-events.md @@ -14,8 +14,8 @@ Kubernetes exports it events through the API server. This input plugin allows to |---------------------|---------------------------------------------------------------------------------------|------------------------------------------------------| | db | Set a database file to keep track of recorded Kubernetes events | | | db.sync | Set a database sync method. values: extra, full, normal and off | normal | -| interval_sec | Set the polling interval for each channel. | 0 | -| interval_nsec | Set the polling interval for each channel (sub seconds: nanoseconds) | 500000000 | +| interval_sec | Set the reconnect interval (seconds)* | 0 | +| interval_nsec | Set the reconnect interval (sub seconds: nanoseconds)* | 500000000 | | kube_url | API Server end-point | https://kubernetes.default.svc | | kube_ca_file | Kubernetes TLS CA file | /var/run/secrets/kubernetes.io/serviceaccount/ca.crt | | kube_ca_path | Kubernetes TLS ca path | | @@ -28,26 +28,45 @@ Kubernetes exports it events through the API server. This input plugin allows to | tls.verify | Enable or disable verification of TLS peer certificate. | On | | tls.vhost | Set optional TLS virtual host. | | + +- _* As of Fluent-Bit 3.1, this plugin uses a Kubernetes watch stream instead of polling. In versions before 3.1, the interval parameters are used for reconnecting the Kubernetes watch stream._ + +## Threading + +This input always runs in its own [thread](../../administration/multithreading.md#inputs). + ## Getting Started +### Kubernetes Service Account +The Kubernetes service account used by Fluent Bit must have `get`, `list`, and `watch` +permissions to `namespaces` and `pods` for the namespaces watched in the +`kube_namespace` configuration parameter. If you're using the helm chart to configure +Fluent Bit, this role is included. + ### Simple Configuration File In the following configuration file, the input plugin *kubernetes_events* collects events every 5 seconds (default for *interval_nsec*) and exposes them through the [standard output plugin](../outputs/standard-output.md) on the console. ```text [SERVICE] -flush 1 -log_level info + flush 1 + log_level info [INPUT] -name kubernetes_events -tag k8s_events -kube_url https://kubernetes.default.svc + name kubernetes_events + tag k8s_events + kube_url https://kubernetes.default.svc [OUTPUT] -name stdout -match * + name stdout + match * ``` ### Event Timestamp -Event timestamp will be created from the first existing field in the following order of precendence: lastTimestamp, firstTimestamp, metadata.creationTimestamp + +Event timestamps are created from the first existing field, based on the following +order of precedence: + +1. `lastTimestamp` +1. `firstTimestamp` +1. `metadata.creationTimestamp` diff --git a/pipeline/inputs/memory-metrics.md b/pipeline/inputs/memory-metrics.md index 0f380c614..de04a3719 100644 --- a/pipeline/inputs/memory-metrics.md +++ b/pipeline/inputs/memory-metrics.md @@ -23,6 +23,11 @@ Fluent Bit v1.x.x [3] memory: [1488543159, {"Mem.total"=>1016044, "Mem.used"=>841420, "Mem.free"=>174624, "Swap.total"=>2064380, "Swap.used"=>139888, "Swap.free"=>1924492}] ``` +## Threading + +You can enable the `threaded` setting to run this input in its own +[thread](../../administration/multithreading.md#inputs). + ### Configuration File In your main configuration file append the following _Input_ & _Output_ sections: diff --git a/pipeline/inputs/mqtt.md b/pipeline/inputs/mqtt.md index 5ed4295fc..9d67bdbea 100644 --- a/pipeline/inputs/mqtt.md +++ b/pipeline/inputs/mqtt.md @@ -6,11 +6,12 @@ The **MQTT** input plugin, allows to retrieve messages/data from MQTT control pa The plugin supports the following configuration parameters: -| Key | Description | -| :--- | :--- | -| Listen | Listener network interface, default: 0.0.0.0 | -| Port | TCP port where listening for connections, default: 1883 | -| Payload_Key | Specify the key where the payload key/value will be preserved. | +| Key | Description | Default | +| :---------- | :------------------------------------------------------------- | :------ | +| Listen | Listener network interface. | `0.0.0.0` | +| Port | TCP port where listening for connections. | `1883` | +| Payload_Key | Specify the key where the payload key/value will be preserved. | _none_ | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -53,4 +54,3 @@ In your main configuration file append the following _Input_ & _Output_ sections Name stdout Match * ``` - diff --git a/pipeline/inputs/network-io-metrics.md b/pipeline/inputs/network-io-metrics.md index 64fb21f26..bbfbcc5de 100644 --- a/pipeline/inputs/network-io-metrics.md +++ b/pipeline/inputs/network-io-metrics.md @@ -2,7 +2,8 @@ The **netif** input plugin gathers network traffic information of the running system every certain interval of time, and reports them. -The Network I/O Metrics plugin creates metrics that are log-based \(I.e. JSON payload\). If you are looking for Prometheus-based metrics please see the Node Exporter Metrics input plugin. +The Network I/O Metrics plugin creates metrics that are log-based, such as JSON +payload. For Prometheus-based metrics, see the Node Exporter Metrics input plugin. ## Configuration Parameters @@ -15,6 +16,7 @@ The plugin supports the following configuration parameters: | Interval\_NSec | Polling interval \(nanosecond\). | 0 | | Verbose | If true, gather metrics precisely. | false | | Test\_At\_Init | If true, testing if the network interface is valid at initialization. | false | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -75,4 +77,3 @@ pipeline: Note: Total interval \(sec\) = Interval\_Sec + \(Interval\_Nsec / 1000000000\). e.g. 1.5s = 1s + 500000000ns - diff --git a/pipeline/inputs/nginx.md b/pipeline/inputs/nginx.md index d56fc375b..1735e5ef3 100644 --- a/pipeline/inputs/nginx.md +++ b/pipeline/inputs/nginx.md @@ -12,6 +12,7 @@ The plugin supports the following configuration parameters: | Port | Port of the target nginx service to connect to. | 80 | | Status_URL | The URL of the Stub Status Handler. | /status | | Nginx_Plus | Turn on NGINX plus mode. | true | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -35,8 +36,8 @@ server { ### Configuration with NGINX Plus REST API -A much more powerful and flexible metrics API is available with NGINX Plus. A path needs to be configured -in NGINX Plus first. +Another metrics API is available with NGINX Plus. You must first configure a path in +NGINX Plus. ``` server { @@ -130,8 +131,9 @@ Fluent Bit v2.x.x ## Exported Metrics -This documentation is copied from the nginx prometheus exporter metrics documentation: -[https://github.com/nginxinc/nginx-prometheus-exporter/blob/master/README.md]. +This documentation is copied from the +[NGINX Prometheus Exporter metrics documentation](https://github.com/nginxinc/nginx-prometheus-exporter/blob/main/README.md) +on GitHub. ### Common metrics: Name | Type | Description | Labels diff --git a/pipeline/inputs/node-exporter-metrics.md b/pipeline/inputs/node-exporter-metrics.md index 097f1ac9c..2ac3eff5c 100644 --- a/pipeline/inputs/node-exporter-metrics.md +++ b/pipeline/inputs/node-exporter-metrics.md @@ -81,6 +81,10 @@ The following table describes the available collectors as part of this plugin. A | nvme | Exposes nvme statistics from `/proc`. | Linux | v2.2.0 | | processes | Exposes processes statistics from `/proc`. | Linux | v2.2.0 | +## Threading + +This input always runs in its own [thread](../../administration/multithreading.md#inputs). + ## Getting Started ### Simple Configuration File @@ -114,7 +118,7 @@ In the following configuration file, the input plugin _node_exporter_metrics col host 0.0.0.0 port 2021 - + ``` {% endtab %} @@ -201,4 +205,3 @@ docker-compose down Our current plugin implements a sub-set of the available collectors in the original Prometheus Node Exporter, if you would like that we prioritize a specific collector please open a Github issue by using the following template:\ \ \- [in_node_exporter_metrics](https://github.com/fluent/fluent-bit/issues/new?assignees=\&labels=\&template=feature_request.md\&title=in_node_exporter_metrics:%20add%20ABC%20collector) - diff --git a/pipeline/inputs/opentelemetry.md b/pipeline/inputs/opentelemetry.md index 38ab0953d..7bb7d1636 100644 --- a/pipeline/inputs/opentelemetry.md +++ b/pipeline/inputs/opentelemetry.md @@ -4,49 +4,66 @@ description: An input plugin to ingest OTLP Logs, Metrics, and Traces # OpenTelemetry -The OpenTelemetry plugin allows you to ingest telemetry data as per the OTLP specification, from various OpenTelemetry exporters, the OpenTelemetry Collector, or Fluent Bit's OpenTelemetry output plugin. +The OpenTelemetry input plugin allows you to receive data as per the OTLP specification, from various OpenTelemetry exporters, the OpenTelemetry Collector, or Fluent Bit's OpenTelemetry output plugin. + +Our compliant implementation fully supports OTLP/HTTP and OTLP/GRPC. Note that the single `port` configured which defaults to 4318 supports both transports. ## Configuration <a href="#configuration" id="configuration"></a> -| Key | Description | default | -| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------- | -| listen | The address to listen on | 0.0.0.0 | -| port | The port for Fluent Bit to listen on | 4318 | -| tag_key | Specify the key name to overwrite a tag. If set, the tag will be overwritten by a value of the key. | | -| raw_traces | Route trace data as a log message | false | -| buffer_max_size | Specify the maximum buffer size in KB to receive a JSON message. | 4M | -| buffer_chunk_size | This sets the chunk size for incoming incoming JSON messages. These chunks are then stored/managed in the space available by buffer_max_size. | 512K | +| Key | Description | default | +| ----------------- | -----------------------------------------------------------------------------------| ------- | +| listen | The network address to listen. | 0.0.0.0 | +| port | The port for Fluent Bit to listen for incoming connections. Note that as of Fluent Bit v3.0.2 this port is used for both transport OTLP/HTTP and OTLP/GRPC. | 4318 | +| tag | Tag for all the data ingested by this plugin. Important: This will only be used if `tag_from_uri` is set to false. Otherwise, the tag will be created from URI. | | +| tag_key | Specify the key name to overwrite a tag. If set, the tag will be overwritten by a value of the key | | +| raw_traces | Route trace data as a log | `false` | +| buffer_max_size | Specify the maximum buffer size in KB/MB/GB to the HTTP payload. | 4M | +| buffer_chunk_size | Initial size and allocation strategy to store the payload (advanced users only) | 512K | |successful_response_code | It allows to set successful response code. `200`, `201` and `204` are supported.| 201 | -| tag_from_uri | If true, tag will be created from uri. e.g. v1_metrics from /v1/metrics . | true | +| tag_from_uri | By default, tag will be created from uri. e.g. v1_metrics from /v1/metrics. This must be set to false if using `tag` | `true` | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | Important note: Raw traces means that any data forwarded to the traces endpoint (`/v1/traces`) will be packed and forwarded as a log message, and will NOT be processed by Fluent Bit. The traces endpoint by default expects a valid protobuf encoded payload, but you can set the `raw_traces` option in case you want to get trace telemetry data to any of Fluent Bit's supported outputs. +### OTLP Transport Protocol Endpoints + +Fluent Bit based on the OTLP desired protocol exposes the following endpoints for data ingestion: + +__OTLP/HTTP__ +- Logs + - `/v1/logs` +- Metrics + - `/v1/metrics` +- Traces + - `/v1/traces` + +__OTLP/GRPC__ + +- Logs + - `/opentelemetry.proto.collector.log.v1.LogService/Export` + - `/opentelemetry.proto.collector.log.v1.LogService/Export` +- Metrics + - `/opentelemetry.proto.collector.metric.v1.MetricService/Export` + - `/opentelemetry.proto.collector.metrics.v1.MetricsService/Export` +- Traces + - `/opentelemetry.proto.collector.trace.v1.TraceService/Export` + - `/opentelemetry.proto.collector.traces.v1.TracesService/Export` + + ## Getting started -The OpenTelemetry plugin currently supports the following telemetry data types: +The OpenTelemetry input plugin supports the following telemetry data types: -| Type | HTTP/JSON | HTTP/Protobuf | -| ----------- | ------------- | --------------- | -| Logs | Stable | Stable | -| Metrics | Unimplemented | Stable | -| Traces | Unimplemented | Stable | +| Type | HTTP1/JSON | HTTP1/Protobuf | HTTP2/GRPC | +| ------- | ---------- | -------------- | ---------- | +| Logs | Stable | Stable | Stable | +| Metrics | Unimplemented | Stable | Stable | +| Traces | Unimplemented | Stable | Stable | A sample config file to get started will look something like the following: {% tabs %} -{% tab title="fluent-bit.conf" %} -``` -[INPUT] - name opentelemetry - listen 127.0.0.1 - port 4318 - -[OUTPUT] - name stdout - match * -``` -{% endtab %} {% tab title="fluent-bit.yaml" %} ```yaml @@ -60,6 +77,20 @@ pipeline: match: '*' ``` {% endtab %} + +{% tab title="fluent-bit.conf" %} +``` +[INPUT] + name opentelemetry + listen 127.0.0.1 + port 4318 + +[OUTPUT] + name stdout + match * +``` +{% endtab %} + {% endtabs %} With the above configuration, Fluent Bit will listen on port `4318` for data. You can now send telemetry data to the endpoints `/v1/metrics`, `/v1/traces`, and `/v1/logs` for metrics, traces, and logs respectively. diff --git a/pipeline/inputs/podman-metrics.md b/pipeline/inputs/podman-metrics.md index fb51e3328..4d6181eb9 100644 --- a/pipeline/inputs/podman-metrics.md +++ b/pipeline/inputs/podman-metrics.md @@ -13,6 +13,7 @@ description: The Podman Metrics input plugin allows you to collect metrics from | path.config | Custom path to podman containers configuration file | /var/lib/containers/storage/overlay-containers/containers.json | | path.sysfs | Custom path to sysfs subsystem directory | /sys/fs/cgroup | | path.procfs | Custom path to proc subsystem directory | /proc | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started diff --git a/pipeline/inputs/process-exporter-metrics.md b/pipeline/inputs/process-exporter-metrics.md index 5c933bce2..f74c0996c 100644 --- a/pipeline/inputs/process-exporter-metrics.md +++ b/pipeline/inputs/process-exporter-metrics.md @@ -42,6 +42,10 @@ macOS does not have the `proc` filesystem so this plugin will not work for it. | thread\_wchan | Exposes thread\_wchan from `/proc`. | | thread | Exposes thread statistics from `/proc`. | +## Threading + +This input always runs in its own [thread](../../administration/multithreading.md#inputs). + ## Getting Started ### Simple Configuration File @@ -83,7 +87,8 @@ curl http://127.0.0.1:2021/metrics ### Container to Collect Host Metrics When deploying Fluent Bit in a container you will need to specify additional settings to ensure that Fluent Bit has access to the process details. -The following `docker` command deploys Fluent Bit with a specific mount path for `procfs` and settings enabled to ensure that Fluent Bit can collect from the host. +The following `docker` command deploys Fluent Bit with a specific mount path for +`procfs` and settings enabled to ensure that Fluent Bit can collect from the host. These are then exposed over port 2021. ``` diff --git a/pipeline/inputs/process.md b/pipeline/inputs/process.md index f4a618466..06ba33913 100644 --- a/pipeline/inputs/process.md +++ b/pipeline/inputs/process.md @@ -1,8 +1,10 @@ # Process Metrics +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=91b97a84-1cd9-41fb-9189-a4f3b30b6bce" /> _Process_ input plugin allows you to check how healthy a process is. It does so by performing a service check at every certain interval of time specified by the user. -The Process metrics plugin creates metrics that are log-based \(I.e. JSON payload\). If you are looking for Prometheus-based metrics please see the Node Exporter Metrics input plugin. +The Process metrics plugin creates metrics that are log-based, such as JSON payload. +For Prometheus-based metrics, see the Node Exporter Metrics input plugin. ## Configuration Parameters @@ -16,6 +18,7 @@ The plugin supports the following configuration parameters: | Alert | If enabled, it will only generate messages if the target process is down. By default this option is disabled. | | Fd | If enabled, a number of fd is appended to each records. Default value is true. | | Mem | If enabled, memory usage of the process is appended to each records. Default value is true. | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | ## Getting Started @@ -63,4 +66,3 @@ Fluent Bit v1.x.x [2] proc.0: [1485780299, {"alive"=>true, "proc_name"=>"fluent-bit", "pid"=>10964, "mem.VmPeak"=>14740000, "mem.VmSize"=>14740000, "mem.VmLck"=>0, "mem.VmHWM"=>1152000, "mem.VmRSS"=>1148000, "mem.VmData"=>2276000, "mem.VmStk"=>88000, "mem.VmExe"=>1768000, "mem.VmLib"=>2328000, "mem.VmPTE"=>68000, "mem.VmSwap"=>0, "fd"=>18}] [3] proc.0: [1485780300, {"alive"=>true, "proc_name"=>"fluent-bit", "pid"=>10964, "mem.VmPeak"=>14740000, "mem.VmSize"=>14740000, "mem.VmLck"=>0, "mem.VmHWM"=>1152000, "mem.VmRSS"=>1148000, "mem.VmData"=>2276000, "mem.VmStk"=>88000, "mem.VmExe"=>1768000, "mem.VmLib"=>2328000, "mem.VmPTE"=>68000, "mem.VmSwap"=>0, "fd"=>18}] ``` - diff --git a/pipeline/inputs/prometheus-remote-write.md b/pipeline/inputs/prometheus-remote-write.md new file mode 100644 index 000000000..b149977b7 --- /dev/null +++ b/pipeline/inputs/prometheus-remote-write.md @@ -0,0 +1,78 @@ +--- +description: An input plugin to ingest payloads of Prometheus remote write +--- + +# Prometheus Remote Write + +This input plugin allows you to ingest a payload in the Prometheus remote-write format, i.e. a remote write sender can transmit data to Fluent Bit. + +## Configuration + +| Key | Description | default | +| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| listen | The address to listen on | 0.0.0.0 | +| port | The port for Fluent Bit to listen on | 8080 | +| buffer\_max\_size | Specify the maximum buffer size in KB to receive a JSON message. | 4M | +| buffer\_chunk\_size | This sets the chunk size for incoming incoming JSON messages. These chunks are then stored/managed in the space available by buffer_max_size. | 512K | +|successful\_response\_code | It allows to set successful response code. `200`, `201` and `204` are supported.| 201 | +| tag\_from\_uri | If true, tag will be created from uri, e.g. api\_prom\_push from /api/prom/push, and any tag specified in the config will be ignored. If false then a tag must be provided in the config for this input. | true | +| uri | Specify an optional HTTP URI for the target web server listening for prometheus remote write payloads, e.g: /api/prom/push | | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | + + +A sample config file to get started will look something like the following: + + +{% tabs %} +{% tab title="fluent-bit.conf" %} +``` +[INPUT] + name prometheus_remote_write + listen 127.0.0.1 + port 8080 + uri /api/prom/push + +[OUTPUT] + name stdout + match * +``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: prometheus_remote_write + listen: 127.0.0.1 + port: 8080 + uri: /api/prom/push + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} + +With the above configuration, Fluent Bit will listen on port `8080` for data. +You can now send payloads in Prometheus remote write format to the endpoint `/api/prom/push`. + +## Examples + +### Communicate with TLS + +Prometheus Remote Write input plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. + +Communicating with TLS, you will need to use the tls related parameters: + +``` +[INPUT] + Name prometheus_remote_write + Listen 127.0.0.1 + Port 8080 + Uri /api/prom/push + Tls On + tls.crt_file /path/to/certificate.crt + tls.key_file /path/to/certificate.key +``` + +Now, you should be able to send data over TLS to the remote write input. diff --git a/pipeline/inputs/prometheus-scrape-metrics.md b/pipeline/inputs/prometheus-scrape-metrics.md index d068de3ac..5f3305b44 100644 --- a/pipeline/inputs/prometheus-scrape-metrics.md +++ b/pipeline/inputs/prometheus-scrape-metrics.md @@ -12,6 +12,7 @@ The initial release of the Prometheus Scrape metric allows you to collect metric | port | The port of the prometheus metric endpoint that you want to scrape | | | scrape\_interval | The interval to scrape metrics | 10s | | metrics\_path | <p>The metrics URI endpoint, that must start with a forward slash.<br><br>Note: Parameters can also be added to the path by using <code>?</code></p> | /metrics | +| threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Example @@ -20,10 +21,10 @@ If an endpoint exposes Prometheus Metrics we can specify the configuration to sc ``` [INPUT] name prometheus_scrape - host 0.0.0.0 + host 0.0.0.0 port 8201 - tag vault - metrics_path /v1/sys/metrics?format=prometheus + tag vault + metrics_path /v1/sys/metrics?format=prometheus scrape_interval 10s [OUTPUT] @@ -78,6 +79,3 @@ If an endpoint exposes Prometheus Metrics we can specify the configuration to sc 2022-03-26T23:01:29.836663788Z vault_runtime_total_gc_pause_ns = 1917611 2022-03-26T23:01:29.836663788Z vault_runtime_total_gc_runs = 19 ``` - - - diff --git a/pipeline/inputs/random.md b/pipeline/inputs/random.md index 73be22a82..3cb055f17 100644 --- a/pipeline/inputs/random.md +++ b/pipeline/inputs/random.md @@ -11,6 +11,7 @@ The plugin supports the following configuration parameters: | Samples | If set, it will only generate a specific number of samples. By default this value is set to _-1_, which will generate unlimited samples. | | Interval\_Sec | Interval in seconds between samples generation. Default value is _1_. | | Interval\_Nsec | Specify a nanoseconds interval for samples generation, it works in conjunction with the Interval\_Sec configuration key. Default value is _0_. | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | ## Getting Started @@ -78,4 +79,3 @@ Fluent Bit v1.x.x [3] random.0: [1475893657, {"rand_value"=>1501010137543905482}] [4] random.0: [1475893658, {"rand_value"=>16238242822364375212}] ``` - diff --git a/pipeline/inputs/serial-interface.md b/pipeline/inputs/serial-interface.md index 01c4451a6..9da195bd8 100644 --- a/pipeline/inputs/serial-interface.md +++ b/pipeline/inputs/serial-interface.md @@ -11,6 +11,7 @@ The **serial** input plugin, allows to retrieve messages/data from a _Serial_ in | Min\_Bytes | The serial interface will expect at least _Min\_Bytes_ to be available before to process the message \(default: 1\) | | Separator | Allows to specify a _separator_ string that's used to determinate when a message ends. | | Format | Specify the format of the incoming data stream. The only option available is 'json'. Note that _Format_ and _Separator_ cannot be used at the same time. | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | ## Getting Started @@ -125,4 +126,3 @@ When the module is loaded, it will interconnect the following virtual interfaces /dev/tnt4 <=> /dev/tnt5 /dev/tnt6 <=> /dev/tnt7 ``` - diff --git a/pipeline/inputs/splunk.md b/pipeline/inputs/splunk.md index ae23faebb..38a7fcd75 100644 --- a/pipeline/inputs/splunk.md +++ b/pipeline/inputs/splunk.md @@ -12,7 +12,10 @@ The **splunk** input plugin handles [Splunk HTTP HEC](https://docs.splunk.com/Do | buffer_max_size | Specify the maximum buffer size in KB to receive a JSON message. | 4M | | buffer_chunk_size | This sets the chunk size for incoming incoming JSON messages. These chunks are then stored/managed in the space available by buffer_max_size. | 512K | | successful_response_code | It allows to set successful response code. `200`, `201` and `204` are supported. | 201 | -| splunk\_token | Add an Splunk token for HTTP HEC.` | | +| splunk\_token | Specify a Splunk token for HTTP HEC authentication. If multiple tokens are specified (with commas and no spaces), usage will be divided across each of the tokens. | | +| store\_token\_in\_metadata | Store Splunk HEC tokens in the Fluent Bit metadata. If set false, they will be stored as normal key-value pairs in the record data. | true | +| splunk\_token\_key | Use the specified key for storing the Splunk token for HTTP HEC. This is only effective when `store_token_in_metadata` is false. | @splunk_token | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started diff --git a/pipeline/inputs/standard-input.md b/pipeline/inputs/standard-input.md index 1259efd7f..9715ff685 100644 --- a/pipeline/inputs/standard-input.md +++ b/pipeline/inputs/standard-input.md @@ -204,3 +204,4 @@ The plugin supports the following configuration parameters: | :--- | :--- | :--- | | Buffer\_Size | Set the buffer size to read data. This value is used to increase buffer size. The value must be according to the [Unit Size](../../administration/configuring-fluent-bit/unit-sizes.md) specification. | 16k | | Parser | The name of the parser to invoke instead of the default JSON input parser | | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | diff --git a/pipeline/inputs/statsd.md b/pipeline/inputs/statsd.md index b9081e190..3bf30bb92 100644 --- a/pipeline/inputs/statsd.md +++ b/pipeline/inputs/statsd.md @@ -15,11 +15,21 @@ The plugin supports the following configuration parameters: | :--- | :--- | :--- | | Listen | Listener network interface. | 0.0.0.0 | | Port | UDP port where listening for connections | 8125 | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | +| Metrics | Ingested record will be marked as a metric record rather than a log record. | `off` | + +Note: When enabling `Metrics On`, we will also handle metrics from the DogStatsD protocol and the internal record in Fluent Bit will be handled as a metric type for downstream processing. +[The full format of DogStatsD of metrics](https://docs.datadoghq.com/developers/dogstatsd/datagram_shell/?tab=metrics#the-dogstatsd-protocol) is not supported. +Including key-value format of tags as below is supported: +`<METRIC_NAME>:<VALUE>|<TYPE>|@<SAMPLE_RATE>|#<TAG_KEY_1>:<TAG_VALUE_1>` +[Events](https://docs.datadoghq.com/developers/dogstatsd/datagram_shell/?tab=events#the-dogstatsd-protocol) and [ServiceChecks](https://docs.datadoghq.com/developers/dogstatsd/datagram_shell/?tab=servicechecks#the-dogstatsd-protocol) formats are not supported yet with `Metrics On`. ## Configuration Examples <a id="config_example"></a> Here is a configuration example. +{% tabs %} +{% tab title="fluent-bit.conf" %} ```python [INPUT] Name statsd @@ -30,6 +40,21 @@ Here is a configuration example. Name stdout Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: statsd + listen: 0.0.0.0 + port: 8125 + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} Now you can input metrics through the UDP port as follows: @@ -45,3 +70,52 @@ Fluent Bit will produce the following records: [0] statsd.0: [1574905141.863344517, {"type"=>"gauge", "bucket"=>"active", "value"=>99.000000, "incremental"=>0}] ``` +## Metrics Setup + +Here is a configuration example for metrics setup. + +{% tabs %} +{% tab title="fluent-bit.conf" %} +```python +[INPUT] + Name statsd + Listen 0.0.0.0 + Port 8125 + Metrics On + +[OUTPUT] + Name stdout + Match * +``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: statsd + listen: 0.0.0.0 + port: 8125 + metrics: On + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} + +Now you can input metrics as metrics type of events through the UDP port as follows: + +```bash +echo "click:+10|c|@0.01|#hello:tag" | nc -q0 -u 127.0.0.1 8125 +echo "active:+99|g|@0.01" | nc -q0 -u 127.0.0.1 8125 +echo "inactive:29|g|@0.0125|#hi:from_fluent-bit" | nc -q0 -u 127.0.0.1 8125 +``` + +Fluent Bit will procude the following metrics events: + +``` +2025-01-09T11:40:26.562424694Z click{incremental="true",hello="tag"} = 1000 +2025-01-09T11:40:28.591477424Z active{incremental="true"} = 9900 +2025-01-09T11:40:31.593118033Z inactive{hi="from_fluent-bit"} = 2320 +``` diff --git a/pipeline/inputs/syslog.md b/pipeline/inputs/syslog.md index 1fe97fe67..af5471a85 100644 --- a/pipeline/inputs/syslog.md +++ b/pipeline/inputs/syslog.md @@ -18,6 +18,7 @@ The plugin supports the following configuration parameters: | Buffer\_Max\_Size | Specify the maximum buffer size to receive a Syslog message. If not set, the default size will be the value of _Buffer\_Chunk\_Size_. | | | Receive\_Buffer\_Size | Specify the maximum socket receive buffer size. If not set, the default value is OS-dependant, but generally too low to accept thousands of syslog messages per second without loss on _udp_ or _unix\_udp_ sockets. Note that on Linux the value is capped by `sysctl net.core.rmem_max`.| | | Source\_Address\_Key| Specify the key where the source address will be injected. | | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ### Considerations @@ -42,6 +43,8 @@ By default the service will create and listen for Syslog messages on the unix so In your main configuration file append the following _Input_ & _Output_ sections: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```python [SERVICE] Flush 1 @@ -59,6 +62,27 @@ In your main configuration file append the following _Input_ & _Output_ sections Name stdout Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +service: + flush: 1 + log_level: info + parsers_file: parsers.conf +pipeline: + inputs: + - name: syslog + path: /tmp/in_syslog + buffer_chunk_size: 32000 + buffer_max_size: 64000 + receive_buffer_size: 512000 + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} ### Testing @@ -90,8 +114,10 @@ The following content aims to provide configuration examples for different use c #### Fluent Bit Configuration -Put the following content in your fluent-bit.conf file: +Put the following content in your configuration file: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```text [SERVICE] Flush 1 @@ -108,6 +134,26 @@ Put the following content in your fluent-bit.conf file: Name stdout Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +service: + flush: 1 + parsers_file: parsers.conf +pipeline: + inputs: + - name: syslog + parser: syslog-rfc3164 + listen: 0.0.0.0 + port: 5140 + mode: tcp + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} then start Fluent Bit. @@ -131,6 +177,8 @@ $ sudo service rsyslog restart Put the following content in your fluent-bit.conf file: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```text [SERVICE] Flush 1 @@ -147,6 +195,26 @@ Put the following content in your fluent-bit.conf file: Name stdout Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +service: + flush: 1 + parsers_file: parsers.conf +pipeline: + inputs: + - name: syslog + parser: syslog-rfc3164 + path: /tmp/fluent-bit.sock + mode: unix_udp + unix_perm: 0644 + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} then start Fluent Bit. @@ -161,4 +229,3 @@ $OMUxSockSocket /tmp/fluent-bit.sock ``` Make sure that the socket file is readable by rsyslog \(tweak the `Unix_Perm` option shown above\). - diff --git a/pipeline/inputs/systemd.md b/pipeline/inputs/systemd.md index b48554d2a..36066b090 100644 --- a/pipeline/inputs/systemd.md +++ b/pipeline/inputs/systemd.md @@ -19,6 +19,7 @@ The plugin supports the following configuration parameters: | Read\_From\_Tail | Start reading new entries. Skip entries already stored in Journald. | Off | | Lowercase | Lowercase the Journald field \(key\). | Off | | Strip\_Underscores | Remove the leading underscore of the Journald field \(key\). For example the Journald field _\_PID_ becomes the key _PID_. | Off | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started diff --git a/pipeline/inputs/tail.md b/pipeline/inputs/tail.md index fec3c2b47..ad38799c7 100644 --- a/pipeline/inputs/tail.md +++ b/pipeline/inputs/tail.md @@ -26,15 +26,17 @@ The plugin supports the following configuration parameters: | DB.sync | Set a default synchronization \(I/O\) method. Values: Extra, Full, Normal, Off. This flag affects how the internal SQLite engine do synchronization to disk, for more details about each option please refer to [this section](https://www.sqlite.org/pragma.html#pragma_synchronous). Most of workload scenarios will be fine with `normal` mode, but if you really need full synchronization after every write operation you should set `full` mode. Note that `full` has a high I/O performance cost. | normal | | DB.locking | Specify that the database will be accessed only by Fluent Bit. Enabling this feature helps to increase performance when accessing the database but it restrict any external tool to query the content. | false | | DB.journal\_mode | sets the journal mode for databases \(WAL\). Enabling WAL provides higher performance. Note that WAL is not compatible with shared network file systems. | WAL | +| DB.compare_filename | This option determines whether to check both the `inode` and the `filename` when retrieving stored file information from the database. 'true' verifies both the `inode` and `filename`, while 'false' checks only the `inode` (default). To check the inode and filename in the database, refer [here](#keep_state). | false | | Mem\_Buf\_Limit | Set a limit of memory that Tail plugin can use when appending data to the Engine. If the limit is reach, it will be paused; when the data is flushed it resumes. | | | Exit\_On\_Eof | When reading a file will exit as soon as it reach the end of the file. Useful for bulk load and tests | false | | Parser | Specify the name of a parser to interpret the entry as a structured message. | | | Key | When a message is unstructured \(no parser applied\), it's appended as a string under the key name _log_. This option allows to define an alternative name for that key. | log | | Inotify_Watcher | Set to false to use file stat watcher instead of inotify. | true | -| Tag | Set a tag \(with regex-extract fields\) that will be placed on lines read. E.g. `kube.<namespace_name>.<pod_name>.<container_name>`. Note that "tag expansion" is supported: if the tag includes an asterisk \(\*\), that asterisk will be replaced with the absolute path of the monitored file \(also see [Workflow of Tail + Kubernetes Filter](../filters/kubernetes.md#workflow-of-tail-kubernetes-filter)\). | | -| Tag\_Regex | Set a regex to extract fields from the file name. E.g. `(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-` | | +| Tag | Set a tag \(with regex-extract fields\) that will be placed on lines read. E.g. `kube.<namespace_name>.<pod_name>.<container_name>.<container_id>`. Note that "tag expansion" is supported: if the tag includes an asterisk \(\*\), that asterisk will be replaced with the absolute path of the monitored file, with slashes replaced by dots \(also see [Workflow of Tail + Kubernetes Filter](../filters/kubernetes.md#workflow-of-tail--kubernetes-filter)\). | | +| Tag\_Regex | Set a regex to extract fields from the file name. E.g. `(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<container_id>[a-z0-9]{64})\.log$` | | | Static\_Batch\_Size | Set the maximum number of bytes to process per iteration for the monitored static files (files that already exists upon Fluent Bit start). | 50M | - +| File\_Cache\_Advise | Set the posix_fadvise in POSIX_FADV_DONTNEED mode. This will reduce the usage of the kernel file cache. This option is ignored if not running on Linux. | On | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | Note that if the database parameter `DB` is **not** specified, by default the plugin will start reading each target file from the beginning. This also might cause some unwanted behavior, for example when a line is bigger that `Buffer_Chunk_Size` and `Skip_Long_Lines` is not turned on, the file will be read from the beginning of each `Refresh_Interval` until the file is rotated. @@ -80,14 +82,18 @@ If you are running Fluent Bit to process logs coming from containers like Docker ```yaml pipeline: inputs: - - tail: + - name: tail path: /var/log/containers/*.log multiline.parser: docker, cri ``` {% endtab %} {% endtabs %} -The two options separated by a comma means multi-format: try `docker` and `cri` multiline formats. +The two options separated by a comma mean Fluent Bit will try each parser in the list in order, applying the first one that matches the log. + +It will use the first parser which has a `start_state` that matches the log. + +For example, it will first try `docker`, and if `docker` does not match, it will then try `cri`. We are **still working** on extending support to do multiline for nested stack traces and such. Over the Fluent Bit v1.8.x release cycle we will be updating the documentation. @@ -126,14 +132,14 @@ $ fluent-bit -i tail -p path=/var/log/syslog -o stdout ### Configuration File -In your main configuration file append the following _Input_ & _Output_ sections. +In your main configuration file, append the following `Input` and `Output` sections: {% tabs %} {% tab title="fluent-bit.conf" %} -```python +```text [INPUT] - Name tail - Path /var/log/syslog + Name tail + Path /var/log/syslog [OUTPUT] Name stdout @@ -145,9 +151,9 @@ In your main configuration file append the following _Input_ & _Output_ sections ```yaml pipeline: inputs: - - tail: + - name: tail path: /var/log/syslog - + outputs: - stdout: match: * @@ -174,6 +180,9 @@ We need to specify a `Parser_Firstline` parameter that matches the first line of In the case above we can use the following parser, that extracts the Time as `time` and the remaining portion of the multiline as `log` + +{% tabs %} +{% tab title="fluent-bit.conf" %} ```text [PARSER] Name multiline @@ -182,9 +191,24 @@ In the case above we can use the following parser, that extracts the Time as `ti Time_Key time Time_Format %b %d %H:%M:%S ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +parsers: + - name: multiline + format: regex + regex: '/(?<time>[A-Za-z]+ \d+ \d+\:\d+\:\d+)(?<message>.*)/' + time_key: time + time_format: '%b %d %H:%M:%S' +``` +{% endtab %} +{% endtabs %} If we want to further parse the entire event we can add additional parsers with `Parser_N` where N is an integer. The final Fluent Bit configuration looks like the following: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```text # Note this is generally added to parsers.conf and referenced in [SERVICE] [PARSER] @@ -204,6 +228,31 @@ If we want to further parse the entire event we can add additional parsers with Name stdout Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +parsers: + - name: multiline + format: regex + regex: '/(?<time>[A-Za-z]+ \d+ \d+\:\d+\:\d+)(?<message>.*)/' + time_key: time + time_format: '%b %d %H:%M:%S' + +pipeline: + inputs: + - name: tail + multiline: on + read_from_head: true + parser_firstline: multiline + path: /var/log/java.log + + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} Our output will be as follows. @@ -256,12 +305,26 @@ Fluent Bit keep the state or checkpoint of each file through using a SQLite data The SQLite journaling mode enabled is `Write Ahead Log` or `WAL`. This allows to improve performance of read and write operations to disk. When enabled, you will see in your file system additional files being created, consider the following configuration statement: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```text [INPUT] name tail path /var/log/containers/*.log db test.db ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: tail + path: /var/log/containers/*.log + db: test.db +``` +{% endtab %} +{% endtabs %} The above configuration enables a database file called `test.db` and in the same path for that file SQLite will create two additional files: @@ -278,4 +341,4 @@ The `WAL` mechanism give us higher performance but also might increase the memor File rotation is properly handled, including logrotate's _copytruncate_ mode. -Note that the `Path` patterns **cannot** match the rotated files. Otherwise, the rotated file would be read again and lead to duplicate records. +Note that the `Path` patterns **cannot** match the rotated files. Otherwise, the rotated file would be read again and lead to duplicate records. \ No newline at end of file diff --git a/pipeline/inputs/tcp.md b/pipeline/inputs/tcp.md index c7a01fd3d..67dba0eb5 100644 --- a/pipeline/inputs/tcp.md +++ b/pipeline/inputs/tcp.md @@ -6,14 +6,16 @@ The **tcp** input plugin allows to retrieve structured JSON or raw messages over The plugin supports the following configuration parameters: -| Key | Description | Default | -| ------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- | +| Key | Description | Default | +| ------------ | ----------- | ------- | | Listen | Listener network interface. | 0.0.0.0 | | Port | TCP port where listening for connections | 5170 | | Buffer\_Size | Specify the maximum buffer size in KB to receive a JSON message. If not set, the default size will be the value of _Chunk\_Size_. | | | Chunk\_Size | By default the buffer to store the incoming JSON messages, do not allocate the maximum memory allowed, instead it allocate memory when is required. The rounds of allocations are set by _Chunk\_Size_ in KB. If not set, _Chunk\_Size_ is equal to 32 (32KB). | 32 | | Format | Specify the expected payload format. It support the options _json_ and _none_. When using _json_, it expects JSON maps, when is set to _none_, it will split every record using the defined _Separator_ (option below). | json | | Separator | When the expected _Format_ is set to _none_, Fluent Bit needs a separator string to split the records. By default it uses the breakline character (LF or 0x10). | | +| Source\_Address\_Key| Specify the key where the source address will be injected. | | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -39,6 +41,8 @@ In the example the JSON messages will only arrive through network interface unde In your main configuration file append the following _Input_ & _Output_ sections: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```python [INPUT] Name tcp @@ -52,6 +56,24 @@ In your main configuration file append the following _Input_ & _Output_ sections Name stdout Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: tcp + listen: 0.0.0.0 + port: 5170 + chunk_size: 32 + buffer_size: 64 + format: json + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} ## Testing diff --git a/pipeline/inputs/thermal.md b/pipeline/inputs/thermal.md index 7453967c0..56af07975 100644 --- a/pipeline/inputs/thermal.md +++ b/pipeline/inputs/thermal.md @@ -20,6 +20,7 @@ The plugin supports the following configuration parameters: | Interval\_NSec | Polling interval \(nanoseconds\). default: 0 | | name\_regex | Optional name filter regex. default: None | | type\_regex | Optional type filter regex. default: None | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). Default: `false`. | ## Getting Started @@ -59,6 +60,8 @@ Copyright (C) Treasure Data In your main configuration file append the following _Input_ & _Output_ sections: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```python [INPUT] Name thermal @@ -68,4 +71,17 @@ In your main configuration file append the following _Input_ & _Output_ sections Name stdout Match * ``` - +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: thermal + tag: my_thermal + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} diff --git a/pipeline/inputs/udp.md b/pipeline/inputs/udp.md index 5a4e12d5f..e95faf9f2 100644 --- a/pipeline/inputs/udp.md +++ b/pipeline/inputs/udp.md @@ -15,6 +15,7 @@ The plugin supports the following configuration parameters: | Format | Specify the expected payload format. It support the options _json_ and _none_. When using _json_, it expects JSON maps, when is set to _none_, it will split every record using the defined _Separator_ (option below). | json | | Separator | When the expected _Format_ is set to _none_, Fluent Bit needs a separator string to split the records. By default it uses the breakline character (LF or 0x10). | | | Source\_Address\_Key| Specify the key where the source address will be injected. | | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | ## Getting Started @@ -40,6 +41,8 @@ In the example the JSON messages will only arrive through network interface unde In your main configuration file append the following _Input_ & _Output_ sections: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```python [INPUT] Name udp @@ -53,6 +56,24 @@ In your main configuration file append the following _Input_ & _Output_ sections Name stdout Match * ``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +pipeline: + inputs: + - name: udp + listen: 0.0.0.0 + port: 5170 + chunk_size: 32 + buffer_size: 64 + format: json + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} ## Testing diff --git a/pipeline/inputs/windows-event-log-winevtlog.md b/pipeline/inputs/windows-event-log-winevtlog.md index f55248945..a572f8771 100644 --- a/pipeline/inputs/windows-event-log-winevtlog.md +++ b/pipeline/inputs/windows-event-log-winevtlog.md @@ -15,8 +15,11 @@ The plugin supports the following configuration parameters: | DB | Set the path to save the read offsets. \(optional\) | | | String\_Inserts | Whether to include StringInserts in output records. \(optional\) | True | | Render\_Event\_As\_XML | Whether to render system part of event as XML string or not. \(optional\) | False | +| Ignore\_Missing\_Channels | Whether to ignore event channels not present in the event log, and continue running with subscribed channels. \(optional\) | False | | Use\_ANSI | Use ANSI encoding on eventlog messages. If you have issues receiving blank strings with old Windows versions (Server 2012 R2), setting this to True may solve the problem. \(optional\) | False | | Event\_Query | Specify XML query for filtering events. | `*` | +| Read\_Limit\_Per\_Cycle | Specify read limit per cycle. | 512KiB | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | Note that if you do not set _db_, the plugin will tail channels on each startup. @@ -40,6 +43,10 @@ Here is a minimum configuration example. Note that some Windows Event Log channels \(like `Security`\) requires an admin privilege for reading. In this case, you need to run fluent-bit as an administrator. +The default value of Read\_Limit\_Per\_Cycle is set up as 512KiB. +Note that 512KiB(= 0x7ffff = 512 * 1024 * 1024) does not equals to 512KB (= 512 * 1000 * 1000). +To increase events per second on this plugin, specify larger value than 512KiB. + #### Query Languages for Event_Query Parameter The `Event_Query` parameter can be used to specify the XML query for filtering Windows EventLog during collection. diff --git a/pipeline/inputs/windows-event-log.md b/pipeline/inputs/windows-event-log.md index 3cac1ccdb..4a6941a40 100644 --- a/pipeline/inputs/windows-event-log.md +++ b/pipeline/inputs/windows-event-log.md @@ -11,6 +11,7 @@ The plugin supports the following configuration parameters: | Channels | A comma-separated list of channels to read from. | | | Interval_Sec | Set the polling interval for each channel. (optional) | 1 | | DB | Set the path to save the read offsets. (optional) | | +| Threaded | Indicates whether to run this input in its own [thread](../../administration/multithreading.md#inputs). | `false` | Note that if you do not set _db_, the plugin will read channels from the beginning on each startup. diff --git a/pipeline/inputs/windows-exporter-metrics.md b/pipeline/inputs/windows-exporter-metrics.md index 61713457e..f1b5ed126 100644 --- a/pipeline/inputs/windows-exporter-metrics.md +++ b/pipeline/inputs/windows-exporter-metrics.md @@ -63,6 +63,10 @@ The following table describes the available collectors as part of this plugin. A | paging\_file | Exposes paging\_file statistics. | Windows | v2.1.9 | | process | Exposes process statistics. | Windows | v2.1.9 | +## Threading + +This input always runs in its own [thread](../../administration/multithreading.md#inputs). + ## Getting Started ### Simple Configuration File diff --git a/pipeline/outputs/azure.md b/pipeline/outputs/azure.md index eda87d29d..3e4bf7b04 100644 --- a/pipeline/outputs/azure.md +++ b/pipeline/outputs/azure.md @@ -20,6 +20,7 @@ To get more details about how to setup Azure Log Analytics, please refer to the | Log_Type_Key | If included, the value for this key will be looked upon in the record and if present, will over-write the `log_type`. If not found then the `log_type` value will be used. | | | Time\_Key | Optional parameter to specify the key name where the timestamp will be stored. | @timestamp | | Time\_Generated | If enabled, the HTTP request header 'time-generated-field' will be included so Azure can override the timestamp with the key specified by 'time_key' option. | off | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ## Getting Started @@ -61,4 +62,3 @@ Another example using the `Log_Type_Key` with [record-accessor](https://docs.flu Customer_ID abc Shared_Key def ``` - diff --git a/pipeline/outputs/azure_blob.md b/pipeline/outputs/azure_blob.md index c775379aa..1c23806ff 100644 --- a/pipeline/outputs/azure_blob.md +++ b/pipeline/outputs/azure_blob.md @@ -31,6 +31,7 @@ We expose different configuration properties. The following table lists all the | emulator\_mode | If you want to send data to an Azure emulator service like [Azurite](https://github.com/Azure/Azurite), enable this option so the plugin will format the requests to the expected format. | off | | endpoint | If you are using an emulator, this option allows you to specify the absolute HTTP address of such service. e.g: [http://127.0.0.1:10000](http://127.0.0.1:10000). | | | tls | Enable or disable TLS encryption. Note that Azure service requires this to be turned on. | off | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ## Getting Started @@ -128,4 +129,3 @@ Azurite Queue service is successfully listening at http://127.0.0.1:10001 127.0.0.1 - - [03/Sep/2020:17:40:03 +0000] "PUT /devstoreaccount1/logs/kubernetes/var.log.containers.app-default-96cbdef2340.log HTTP/1.1" 201 - 127.0.0.1 - - [03/Sep/2020:17:40:04 +0000] "PUT /devstoreaccount1/logs/kubernetes/var.log.containers.app-default-96cbdef2340.log?comp=appendblock HTTP/1.1" 201 - ``` - diff --git a/pipeline/outputs/azure_kusto.md b/pipeline/outputs/azure_kusto.md index dcdca0d38..b052611aa 100644 --- a/pipeline/outputs/azure_kusto.md +++ b/pipeline/outputs/azure_kusto.md @@ -4,15 +4,23 @@ description: Send logs to Azure Data Explorer (Kusto) # Azure Data Explorer (Kusto) -The Kusto output plugin allows to ingest your logs into an [Azure Data Explorer](https://azure.microsoft.com/en-us/services/data-explorer/) cluster, via the [Queued Ingestion](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/api/netfx/about-kusto-ingest#queued-ingestion) mechanism. +The Kusto output plugin allows to ingest your logs into an [Azure Data Explorer](https://azure.microsoft.com/en-us/services/data-explorer/) cluster, via the [Queued Ingestion](https://docs.microsoft.com/en-us/azure/data-explorer/kusto/api/netfx/about-kusto-ingest#queued-ingestion) mechanism. This output plugin can also be used to ingest logs into an [Eventhouse](https://blog.fabric.microsoft.com/en-us/blog/eventhouse-overview-handling-real-time-data-with-microsoft-fabric/) cluster in Microsoft Fabric Real Time Analytics. -## Creating a Kusto Cluster and Database +## For ingesting into Azure Data Explorer: Creating a Kusto Cluster and Database You can create an Azure Data Explorer cluster in one of the following ways: - [Create a free-tier cluster](https://dataexplorer.azure.com/freecluster) - [Create a fully-featured cluster](https://docs.microsoft.com/en-us/azure/data-explorer/create-cluster-database-portal) +## For ingesting into Microsoft Fabric Real Time Analytics : Creating an Eventhouse Cluster and KQL Database + +You can create an Eventhouse cluster and a KQL database follow the following steps: + +- [Create an Eventhouse cluster](https://docs.microsoft.com/en-us/azure/data-explorer/eventhouse/create-eventhouse-cluster) +- [Create a KQL database](https://docs.microsoft.com/en-us/azure/data-explorer/eventhouse/create-database) + + ## Creating an Azure Registered Application Fluent-Bit will use the application's credentials, to ingest data into your cluster. @@ -43,9 +51,10 @@ By default, Kusto will insert incoming ingestions into a table by inferring the | Key | Description | Default | | --------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | -| tenant_id | _Required_ - The tenant/domain ID of the AAD registered application. | | -| client_id | _Required_ - The client ID of the AAD registered application. | | -| client_secret | _Required_ - The client secret of the AAD registered application ([App Secret](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#option-2-create-a-new-application-secret)). | +| tenant_id | _Required if `managed_identity_client_id` is not set_ - The tenant/domain ID of the AAD registered application. | | +| client_id | _Required if `managed_identity_client_id` is not set_ - The client ID of the AAD registered application. | | +| client_secret | _Required if `managed_identity_client_id` is not set_ - The client secret of the AAD registered application ([App Secret](https://docs.microsoft.com/en-us/azure/active-directory/develop/howto-create-service-principal-portal#option-2-create-a-new-application-secret)). | +| managed_identity_client_id | _Required if `tenant_id`, `client_id`, and `client_secret` are not set_ - The managed identity ID to authenticate with. Set to `SYSTEM` for system-assigned managed identity, or set to the MI client ID (GUID) for user-assigned managed identity. | | | ingestion_endpoint | _Required_ - The cluster's ingestion endpoint, usually in the form `https://ingest-cluster_name.region.kusto.windows.net | | database_name | _Required_ - The database name. | | | table_name | _Required_ - The table name. | | @@ -55,6 +64,10 @@ By default, Kusto will insert incoming ingestions into a table by inferring the | tag_key | The key name of tag. If `include_tag_key` is false, This property is ignored. | `tag` | | include_time_key | If enabled, a timestamp is appended to output. The key name is used `time_key` property. | `On` | | time_key | The key name of time. If `include_time_key` is false, This property is ignored. | `timestamp` | +| ingestion_endpoint_connect_timeout | The connection timeout of various Kusto endpoints in seconds. | `60` | +| compression_enabled | If enabled, sends compressed HTTP payload (gzip) to Kusto. | `true` | +| ingestion_resources_refresh_interval | The ingestion resources refresh interval of Kusto endpoint in seconds. | `3600` | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ### Configuration File @@ -62,15 +75,19 @@ Get started quickly with this configuration file: ``` [OUTPUT] - Match * - Name azure_kusto - Tenant_Id <app_tenant_id> - Client_Id <app_client_id> - Client_Secret <app_secret> - Ingestion_Endpoint https://ingest-<cluster>.<region>.kusto.windows.net - Database_Name <database_name> - Table_Name <table_name> - Ingestion_Mapping_Reference <mapping_name> + match * + name azure_kusto + tenant_id <app_tenant_id> + client_id <app_client_id> + client_secret <app_secret> + ingestion_endpoint https://ingest-<cluster>.<region>.kusto.windows.net + database_name <database_name> + table_name <table_name> + ingestion_mapping_reference <mapping_name> + ingestion_endpoint_connect_timeout <ingestion_endpoint_connect_timeout> + compression_enabled <compression_enabled> + ingestion_resources_refresh_interval <ingestion_resources_refresh_interval> + ``` ## Troubleshooting diff --git a/pipeline/outputs/azure_logs_ingestion.md b/pipeline/outputs/azure_logs_ingestion.md index e008ac4da..dbf7678b9 100644 --- a/pipeline/outputs/azure_logs_ingestion.md +++ b/pipeline/outputs/azure_logs_ingestion.md @@ -37,6 +37,7 @@ To get more details about how to setup these components, please refer to the fol | time\_key | _Optional_ - Specify the key name where the timestamp will be stored. | `@timestamp` | | time\_generated | _Optional_ - If enabled, will generate a timestamp and append it to JSON. The key name is set by the 'time_key' parameter. | `true` | | compress | _Optional_ - Enable HTTP payload gzip compression. | `true` | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ## Getting Started @@ -58,7 +59,7 @@ Use this configuration to quickly get started: Name tail Path /path/to/your/sample.log Tag sample - Key RawData + Key RawData # Or use other plugins Plugin # [INPUT] # Name cpu diff --git a/pipeline/outputs/bigquery.md b/pipeline/outputs/bigquery.md index 8ef7a469f..dd2c278a9 100644 --- a/pipeline/outputs/bigquery.md +++ b/pipeline/outputs/bigquery.md @@ -59,6 +59,7 @@ You must configure workload identity federation in GCP before using it with Flue | pool\_id | GCP workload identity pool where the identity provider was created. Used to construct the full resource name of the identity provider. | | | provider\_id | GCP workload identity provider. Used to construct the full resource name of the identity provider. Currently only AWS accounts are supported. | | | google\_service\_account | Email address of the Google service account to impersonate. The workload identity provider must have permissions to impersonate this service account, and the service account must have permissions to access Google BigQuery resources (e.g. `write` access to tables) | | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | See Google's [official documentation](https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll) for further details. @@ -77,4 +78,3 @@ If you are using a _Google Cloud Credentials File_, the following configuration dataset_id my_dataset table_id dummy_table ``` - diff --git a/pipeline/outputs/chronicle.md b/pipeline/outputs/chronicle.md index d2935fc00..5298ec584 100644 --- a/pipeline/outputs/chronicle.md +++ b/pipeline/outputs/chronicle.md @@ -1,5 +1,3 @@ ---- - # Chronicle The Chronicle output plugin allows ingesting security logs into [Google Chronicle](https://chronicle.security/) service. This connector is designed to send unstructured security logs. @@ -36,6 +34,7 @@ Fluent Bit's Chronicle output plugin uses a JSON credentials file for authentica | log\_type | The log type to parse logs as. Google Chronicle supports parsing for [specific log types only](https://cloud.google.com/chronicle/docs/ingestion/parser-list/supported-default-parsers). | | | region | The GCP region in which to store security logs. Currently, there are several supported regions: `US`, `EU`, `UK`, `ASIA`. Blank is handled as `US`. | | | log\_key | By default, the whole log record will be sent to Google Chronicle. If you specify a key name with this option, then only the value of that key will be sent to Google Chronicle. | | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | See Google's [official documentation](https://cloud.google.com/chronicle/docs/reference/ingestion-api) for further details. diff --git a/pipeline/outputs/cloudwatch.md b/pipeline/outputs/cloudwatch.md index 74a17c673..bfcd2ba2d 100644 --- a/pipeline/outputs/cloudwatch.md +++ b/pipeline/outputs/cloudwatch.md @@ -34,6 +34,7 @@ See [here](https://github.com/fluent/fluent-bit-docs/tree/43c4fe134611da471e706b | profile | Option to specify an AWS Profile for credentials. Defaults to `default` | | auto\_retry\_requests | Immediately retry failed requests to AWS services once. This option does not affect the normal Fluent Bit retry mechanism with backoff. Instead, it enables an immediate retry with no delay for networking errors, which may help improve throughput when there are transient/random networking issues. This option defaults to `true`. | | external\_id | Specify an external ID for the STS API, can be used with the role\_arn parameter if your role requires an external ID. | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. Default: `1`. | ## Getting Started @@ -60,6 +61,16 @@ In your main configuration file append the following _Output_ section: log_stream_prefix from-fluent-bit- auto_create_group On ``` +#### Intergration with Localstack (Cloudwatch Logs) + +For an instance of Localstack running at `http://localhost:4566`, the following configuration needs to be added to the `[OUTPUT]` section: + +```text +endpoint localhost +port 4566 +``` + +Any testing credentials can be exported as local variables, such as `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`. ### Permissions @@ -80,28 +91,6 @@ The following AWS IAM permissions are required to use this plugin: } ``` -### Worker support - -Fluent Bit 1.7 adds a new feature called `workers` which enables outputs to have dedicated threads. This `cloudwatch_logs` plugin has partial support for workers in Fluent Bit 2.1.11 and prior. **2.1.11 and prior, the plugin can support a single worker; enabling multiple workers will lead to errors/indeterminate behavior.** -Starting from Fluent Bit 2.1.12, the `cloudwatch_logs` plugin added full support for workers, meaning that more than one worker can be configured. - -Example: - -``` -[OUTPUT] - Name cloudwatch_logs - Match * - region us-east-1 - log_group_name fluent-bit-cloudwatch - log_stream_prefix from-fluent-bit- - auto_create_group On - workers 1 -``` - -If you enable workers, you are enabling one or more dedicated threads for your CloudWatch output. -We recommend starting with 1 worker, evaluating the performance, and then enabling more workers if needed. -For most users, the plugin can provide sufficient throughput with 0 or 1 workers. - ### Log Stream and Group Name templating using record\_accessor syntax Sometimes, you may want the log group or stream name to be based on the contents of the log record itself. This plugin supports templating log group and stream names using Fluent Bit [record\_accessor](https://docs.fluentbit.io/manual/administration/configuring-fluent-bit/classic-mode/record-accessor) syntax. @@ -271,4 +260,4 @@ You can use our SSM Public Parameters to find the Amazon ECR image URI in your r aws ssm get-parameters-by-path --path /aws/service/aws-for-fluent-bit/ ``` -For more see [the AWS for Fluent Bit github repo](https://github.com/aws/aws-for-fluent-bit#public-images). +For more see [the AWS for Fluent Bit github repo](https://github.com/aws/aws-for-fluent-bit#public-images). \ No newline at end of file diff --git a/pipeline/outputs/dash0.md b/pipeline/outputs/dash0.md new file mode 100644 index 000000000..59a858706 --- /dev/null +++ b/pipeline/outputs/dash0.md @@ -0,0 +1,64 @@ +--- +description: Send logs to Dash0 +--- + +# Dash0 + +Stream logs to [Dash0](https://www.dash0.com) by utilizing the [OpenTelemetry plugin](opentelemetry.md) to send data to the Dash0 log ingress. + +## Configuration parameters + +| Key | Description | Default | +| -------------------------- | ----------- | ------- | +| `header` | The specific header for bearer authorization, where {your-Auth-token-here} is your Dash0 Auth Token. | Authorization Bearer {your-Auth-token-here} | +| `host` | Your Dash0 ingress endpoint. | `ingress.eu-west-1.aws.dash0.com` | +| `port` | TCP port of your Dash0 ingress endpoint. | `443` | +| `metrics_uri` | Specify an optional HTTP URI for the target web server listening for metrics | `/v1/metrics` | +| `logs_uri` | Specify an optional HTTP URI for the target web server listening for logs | `/v1/logs` | +| `traces_uri` | Specify an optional HTTP URI for the target web server listening for traces | `/v1/traces` | + +### TLS / SSL + +The OpenTelemetry output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). + +## Getting started + +To get started with sending logs to Dash0: + +1. Get an [Auth Token](https://www.dash0.com/documentation/dash0/key-concepts/auth-tokens) from **Settings** > **Auth Tokens**. +1. In your main Fluent Bit configuration file, append the following `Output` section: + +{% tabs %} +{% tab title="fluent-bit.conf" %} +```text +[OUTPUT] + Name opentelemetry + Match * + Host ingress.eu-west-1.aws.dash0.com + Port 443 + Header Authorization Bearer {your-Auth-token-here} + Metrics_uri /v1/metrics + Logs_uri /v1/logs + Traces_uri /v1/traces +``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml +[OUTPUT] + Name: opentelemetry + Match: * + Host: ingress.eu-west-1.aws.dash0.com + Port: 443 + Header: Authorization Bearer {your-Auth-token-here} + Metrics_uri: /v1/metrics + Logs_uri: /v1/logs + Traces_uri: /v1/traces +``` +{% endtab %} +{% endtabs %} + +## References + +- [Dash0 documentation](https://www.dash0.com/documentation/dash0) \ No newline at end of file diff --git a/pipeline/outputs/datadog.md b/pipeline/outputs/datadog.md index 20204700d..9b7f3cbe4 100644 --- a/pipeline/outputs/datadog.md +++ b/pipeline/outputs/datadog.md @@ -10,21 +10,24 @@ Before you begin, you need a [Datadog account](https://app.datadoghq.com/signup) ## Configuration Parameters -| Key | Description | Default | -| --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------- | -| Host | _Required_ - The Datadog server where you are sending your logs. | `http-intake.logs.datadoghq.com` | -| TLS | _Required_ - End-to-end security communications security protocol. Datadog recommends setting this to `on`. | `off` | -| compress | _Recommended_ - compresses the payload in GZIP format, Datadog supports and recommends setting this to `gzip`. | | -| apikey | _Required_ - Your [Datadog API key](https://app.datadoghq.com/account/settings#api). | | -| Proxy | _Optional_ - Specify an HTTP Proxy. The expected format of this value is [http://host:port](http://host/:port). Note that _https_ is **not** supported yet. | | -| provider | To activate the remapping, specify configuration flag provider with value `ecs`. | | -| json_date_key | Date key name for output. | `timestamp` | -| include_tag_key | If enabled, a tag is appended to output. The key name is used `tag_key` property. | `false` | -| tag_key | The key name of tag. If `include_tag_key` is false, This property is ignored. | `tagkey` | -| dd_service | _Recommended_ - The human readable name for your service generating the logs - the name of your application or database. | | -| dd_source | _Recommended_ - A human readable name for the underlying technology of your service. For example, `postgres` or `nginx`. | | -| dd_tags | _Optional_ - The [tags](https://docs.datadoghq.com/tagging/) you want to assign to your logs in Datadog. | | -| dd_message_key | By default, the plugin searches for the key 'log' and remap the value to the key 'message'. If the property is set, the plugin will search the property name key. | | +| Key | Description | Default | +| --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------- | +| Host | _Required_ - The Datadog server where you are sending your logs. | `http-intake.logs.datadoghq.com` | +| TLS | _Required_ - End-to-end security communications security protocol. Datadog recommends setting this to `on`. | `off` | +| compress | _Recommended_ - compresses the payload in GZIP format, Datadog supports and recommends setting this to `gzip`. | | +| apikey | _Required_ - Your [Datadog API key](https://app.datadoghq.com/account/settings#api). | | +| Proxy | _Optional_ - Specify an HTTP Proxy. The expected format of this value is [http://host:port](http://host/:port). Note that _https_ is **not** supported yet. | | +| provider | To activate the remapping, specify configuration flag provider with value `ecs`. | | +| json_date_key | Date key name for output. | `timestamp` | +| include_tag_key | If enabled, a tag is appended to output. The key name is used `tag_key` property. | `false` | +| tag_key | The key name of tag. If `include_tag_key` is false, This property is ignored. | `tagkey` | +| dd_service | _Recommended_ - The human readable name for your service generating the logs (e.g. the name of your application or database). If unset, Datadog will look for the service using [Service Remapper](https://docs.datadoghq.com/logs/log_configuration/pipelines/?tab=service#service-attribute)." | | +| dd_source | _Recommended_ - A human readable name for the underlying technology of your service (e.g. `postgres` or `nginx`). If unset, Datadog will look for the source in the [`ddsource` attribute](https://docs.datadoghq.com/logs/log_configuration/pipelines/?tab=source#source-attribute). | | +| dd_tags | _Optional_ - The [tags](https://docs.datadoghq.com/tagging/) you want to assign to your logs in Datadog. If unset, Datadog will look for the tags in the [`ddtags` attribute](https://docs.datadoghq.com/api/latest/logs/#send-logs). | | +| dd_message_key | By default, the plugin searches for the key 'log' and remap the value to the key 'message'. If the property is set, the plugin will search the property name key. | | +| dd_hostname | The host the emitted logs should be associated with. If unset, Datadog expects the host to be set with `host`, `hostname`, or `syslog.hostname` attributes. See [Datadog Logs preprocessor documentation](https://docs.datadoghq.com/logs/log_configuration/pipelines/?tab=host#preprocessing) for recognized attributes. | _none_ | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | +| header | Add additional arbitrary HTTP header key/value pair. Multiple headers can be set. | _none_ | ### Configuration File @@ -41,6 +44,7 @@ Get started quickly with this configuration file: dd_service <my-app-service> dd_source <my-app-source> dd_tags team:logs,foo:bar + dd_hostname myhost ``` ## Troubleshooting diff --git a/pipeline/outputs/dynatrace.md b/pipeline/outputs/dynatrace.md new file mode 100644 index 000000000..03cfe6250 --- /dev/null +++ b/pipeline/outputs/dynatrace.md @@ -0,0 +1,58 @@ +--- +description: Send logs to Dynatrace +--- + +# Dynatrace + +Stream logs to [Dynatrace](https://www.dynatrace.com) by utilizing the **http** +plugin to send data to the +[Dynatrace generic log ingest API](https://docs.dynatrace.com/docs/shortlink/lma-generic-log-ingestion). + +## Configuration parameters + +| Key | Description | Default | +| -------------------------- | ----------- | ------- | +| `header` | The specific header for content-type. | `Content-Type application/json; charset=utf-8` | +| `header` | The specific header for authorization token, where {your-API-token-here} is the Dynatrace API token with log ingest scope. | Authorization Api-Token {your-API-token-here} | +| `allow_duplicated_headers` | Specifies duplicated header use. | `false` | +| `host` | Your Dynatrace environment hostname where {your-environment-id} is your environment ID. | `{your-environment-id}.live.dynatrace.com` | +| `port` | TCP port of your Dynatrace host. | `443` | +| `uri` | Specify the HTTP URI for Dynatrace log ingest API. | `/api/v2/logs/ingest` | +| `format` | The data format to be used in the HTTP request body. | `json` | +| `json_date_format` | Date format standard for JSON. | `iso8601` | +| `json_date_key` | Fieldname specifying message timestamp. | `timestamp` | +| `tls` | Specify to use TLS. | `on` | +| `tls.verify` | TLS verification. | `on` | + +## Getting started + +To get started with sending logs to Dynatrace: + +1. Get a [Dynatrace API](https://docs.dynatrace.com/docs/shortlink/api-authentication) + token with the `logs.ingest` (Ingest Logs) scope. +1. Determine your Dynatrace + [environment ID](https://docs.dynatrace.com/docs/shortlink/monitoring-environment#environment-id). +1. In your main Fluent Bit configuration file, append the following `Output` section: + + ```text + [OUTPUT] + name http + match * + header Content-Type application/json; charset=utf-8 + header Authorization Api-Token {your-API-token-here} + allow_duplicated_headers false + host {your-environment-id}.live.dynatrace.com + Port 443 + URI /api/v2/logs/ingest + Format json + json_date_format iso8601 + json_date_key timestamp + tls On + tls.verify On + +## References + +- [Dynatrace Fluent Bit documentation](https://docs.dynatrace.com/docs/shortlink/lma-stream-logs-with-fluent-bit) +- [Fluent Bit integration in Dynatrace Hub](https://www.dynatrace.com/hub/detail/fluent-bit/?filter=log-management-and-analytics) +- [Video: Stream a Log File to Dynatrace using Fluent Bit](https://www.youtube.com/watch?v=JJJNxhtJ6R0) +- [Blog: Easily stream logs from Fluent Bit to Dynatrace](https://www.dynatrace.com/news/blog/easily-stream-logs-with-fluent-bit-to-dynatrace/) \ No newline at end of file diff --git a/pipeline/outputs/elasticsearch.md b/pipeline/outputs/elasticsearch.md index 7f6f4a708..d248d2338 100644 --- a/pipeline/outputs/elasticsearch.md +++ b/pipeline/outputs/elasticsearch.md @@ -4,78 +4,91 @@ description: Send logs to Elasticsearch (including Amazon OpenSearch Service) # Elasticsearch -The **es** output plugin, allows to ingest your records into an [Elasticsearch](http://www.elastic.co) database. The following instructions assumes that you have a fully operational Elasticsearch service running in your environment. +The **es** output plugin lets you ingest your records into an +[Elasticsearch](http://www.elastic.co) database. To use this plugin, you must have an +operational Elasticsearch service running in your environment. ## Configuration Parameters -| Key | Description | default | +| Key | Description | Default | | :--- | :--- | :--- | -| Host | IP address or hostname of the target Elasticsearch instance | 127.0.0.1 | -| Port | TCP port of the target Elasticsearch instance | 9200 | -| Path | Elasticsearch accepts new data on HTTP query path "/\_bulk". But it is also possible to serve Elasticsearch behind a reverse proxy on a subpath. This option defines such path on the fluent-bit side. It simply adds a path prefix in the indexing HTTP POST URI. | Empty string | -| compress | Set payload compression mechanism. Option available is 'gzip' | | -| Buffer\_Size | Specify the buffer size used to read the response from the Elasticsearch HTTP service. This option is useful for debugging purposes where is required to read full responses, note that response size grows depending of the number of records inserted. To set an _unlimited_ amount of memory set this value to **False**, otherwise the value must be according to the [Unit Size](../../administration/configuring-fluent-bit/unit-sizes.md) specification. | 512KB | -| Pipeline | Newer versions of Elasticsearch allows to setup filters called pipelines. This option allows to define which pipeline the database should use. For performance reasons is strongly suggested to do parsing and filtering on Fluent Bit side, avoid pipelines. | | -| AWS\_Auth | Enable AWS Sigv4 Authentication for Amazon OpenSearch Service | Off | -| AWS\_Region | Specify the AWS region for Amazon OpenSearch Service | | -| AWS\_STS\_Endpoint | Specify the custom sts endpoint to be used with STS API for Amazon OpenSearch Service | | -| AWS\_Role\_ARN | AWS IAM Role to assume to put records to your Amazon cluster | | -| AWS\_External\_ID | External ID for the AWS IAM Role specified with `aws_role_arn` | | -| AWS\_Service\_Name | Service name to be used in AWS Sigv4 signature. For integration with Amazon OpenSearch Serverless, set to `aoss`. See the [FAQ](opensearch.md#faq) section on Amazon OpenSearch Serverless for more information. | es | -| AWS\_Profile | AWS profile name | default | -| Cloud\_ID | If you are using Elastic's Elasticsearch Service you can specify the cloud\_id of the cluster running. The Cloud ID string has the format `<deployment_name>:<base64_info>`. Once decoded, the `base64_info` string has the format `<deployment_region>$<elasticsearch_hostname>$<kibana_hostname>`. - | | -| Cloud\_Auth | Specify the credentials to use to connect to Elastic's Elasticsearch Service running on Elastic Cloud | | -| HTTP\_User | Optional username credential for Elastic X-Pack access | | -| HTTP\_Passwd | Password for user defined in HTTP\_User | | -| Index | Index name | fluent-bit | -| Type | Type name | \_doc | -| Logstash\_Format | Enable Logstash format compatibility. This option takes a boolean value: True/False, On/Off | Off | -| Logstash\_Prefix | When Logstash\_Format is enabled, the Index name is composed using a prefix and the date, e.g: If Logstash\_Prefix is equals to 'mydata' your index will become 'mydata-YYYY.MM.DD'. The last string appended belongs to the date when the data is being generated. | logstash | -| Logstash\_Prefix\_Key | When included: the value of the key in the record will be evaluated as key reference and overrides Logstash\_Prefix for index generation. If the key/value is not found in the record then the Logstash\_Prefix option will act as a fallback. The parameter is expected to be a [record accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md). | | -| Logstash\_Prefix\_Separator | Set a separator between logstash_prefix and date.| - | -| Logstash\_DateFormat | Time format \(based on [strftime](http://man7.org/linux/man-pages/man3/strftime.3.html)\) to generate the second part of the Index name. | %Y.%m.%d | -| Time\_Key | When Logstash\_Format is enabled, each record will get a new timestamp field. The Time\_Key property defines the name of that field. | @timestamp | -| Time\_Key\_Format | When Logstash\_Format is enabled, this property defines the format of the timestamp. | %Y-%m-%dT%H:%M:%S | -| Time\_Key\_Nanos | When Logstash\_Format is enabled, enabling this property sends nanosecond precision timestamps. | Off | -| Include\_Tag\_Key | When enabled, it append the Tag name to the record. | Off | -| Tag\_Key | When Include\_Tag\_Key is enabled, this property defines the key name for the tag. | \_flb-key | -| Generate\_ID | When enabled, generate `_id` for outgoing records. This prevents duplicate records when retrying ES. | Off | -| Id\_Key | If set, `_id` will be the value of the key from incoming record and `Generate_ID` option is ignored. | | -| Write\_Operation | The write\_operation can be any of: create (default), index, update, upsert. | create | -| Replace\_Dots | When enabled, replace field name dots with underscore, required by Elasticsearch 2.0-2.3. | Off | -| Trace\_Output | Print all elasticsearch API request payloads to stdout \(for diag only\) | Off | -| Trace\_Error | If elasticsearch return an error, print the elasticsearch API request and response \(for diag only\) | Off | -| Current\_Time\_Index | Use current time for index generation instead of message record | Off | -| Suppress\_Type\_Name | When enabled, mapping types is removed and `Type` option is ignored. If using Elasticsearch 8.0.0 or higher - it [no longer supports mapping types](https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html), so it shall be set to On. | Off | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 2 | - -> The parameters _index_ and _type_ can be confusing if you are new to Elastic, if you have used a common relational database before, they can be compared to the _database_ and _table_ concepts. Also see [the FAQ below](elasticsearch.md#faq) +| `Host` | IP address or hostname of the target Elasticsearch instance | `127.0.0.1` | +| `Port` | TCP port of the target Elasticsearch instance | `9200` | +| `Path` | Elasticsearch accepts new data on HTTP query path `/_bulk`. You can also serve Elasticsearch behind a reverse proxy on a sub-path. Define the path by adding a path prefix in the indexing HTTP POST URI. | Empty string | +| `compress` | Set payload compression mechanism. Option available is `gzip`. | _none_ | +| `Buffer_Size` | Specify the buffer size used to read the response from the Elasticsearch HTTP service. Use for debugging purposes where required to read full responses. Response size grows depending of the number of records inserted. To use an unlimited amount of memory, set this value to `False`. Otherwise set the value according to the [Unit Size](../../administration/configuring-fluent-bit/unit-sizes.md). | `512KB` | +| `Pipeline` | Define which pipeline the database should use. For performance reasons, it's strongly suggested to do parsing and filtering on Fluent Bit side, and avoid pipelines. | _none_ | +| `AWS_Auth` | Enable AWS Sigv4 Authentication for Amazon OpenSearch Service. | `Off` | +| `AWS_Region` | Specify the AWS region for Amazon OpenSearch Service. | _none_ | +| `AWS_STS_Endpoint` | Specify the custom STS endpoint to be used with STS API for Amazon OpenSearch Service | _none_ | +| `AWS_Role_ARN` | AWS IAM Role to assume to put records to your Amazon cluster | _none_ | +| `AWS_External_ID` | External ID for the AWS IAM Role specified with `aws_role_arn` | _none_ | +| `AWS_Service_Name` | Service name to use in AWS Sigv4 signature. For integration with Amazon OpenSearch Serverless, set to `aoss`. See [Amazon OpenSearch Serverless](opensearch.md) for more information. | `es` | +| `AWS_Profile` | AWS profile name | `default` | +| `Cloud_ID` | If using Elastic's Elasticsearch Service you can specify the `cloud_id` of the cluster running. The string has the format `<deployment_name>:<base64_info>`. Once decoded, the `base64_info` string has the format `<deployment_region>$<elasticsearch_hostname>$<kibana_hostname>`. | _none_ | +| `Cloud_Auth` | Specify the credentials to use to connect to Elastic's Elasticsearch Service running on Elastic Cloud | _none_ | +| `HTTP_User` | Optional username credential for Elastic X-Pack access | _none_ | +| `HTTP_Passwd` | Password for user defined in `HTTP_User` | _none_ | +| `Index` | Index name | `fluent-bit` | +| `Type` | Type name | `_doc` | +| `Logstash_Format` | Enable Logstash format compatibility. This option takes a Boolean value: `True/False`, `On/Off` | `Off` | +| `Logstash_Prefix` | When `Logstash_Format` is enabled, the Index name is composed using a prefix and the date, e.g: If `Logstash_Prefix` is equal to `mydata` your index will become `mydata-YYYY.MM.DD`. The last string appended belongs to the date when the data is being generated. | `logstash` | +| `Logstash_Prefix_Key` | When included: the value of the key in the record will be evaluated as key reference and overrides `Logstash_Prefix` for index generation. If the key/value isn't found in the record then the `Logstash_Prefix` option will act as a fallback. The parameter is expected to be a [record accessor](../../administration/configuring-fluent-bit/classic-mode/record-accessor.md). | _none_ | +| `Logstash_Prefix_Separator` | Set a separator between `Logstash_Prefix` and date.| `-` | +| `Logstash_DateFormat` | Time format based on [strftime](http://man7.org/linux/man-pages/man3/strftime.3.html) to generate the second part of the Index name. | `%Y.%m.%d` | +| `Time_Key` | When `Logstash_Format` is enabled, each record will get a new timestamp field. The `Time_Key` property defines the name of that field. | `@timestamp` | +| `Time_Key_Format` | When `Logstash_Format` is enabled, this property defines the format of the timestamp. | `%Y-%m-%dT%H:%M:%S` | +| `Time_Key_Nanos` | When `Logstash_Format` is enabled, enabling this property sends nanosecond precision timestamps. | `Off` | +| `Include_Tag_Key` | When enabled, it append the Tag name to the record. | `Off` | +| `Tag_Key` | When `Include_Tag_Key` is enabled, this property defines the key name for the tag. | `_flb-key` | +| `Generate_ID` | When enabled, generate `_id` for outgoing records. This prevents duplicate records when retrying ES. | `Off` | +| `Id_Key` | If set, `_id` will be the value of the key from incoming record and `Generate_ID` option is ignored. | _none_ | +| `Write_Operation` | `Write_operation` can be any of: `create`, `index`, `update`, `upsert`. | `create` | +| `Replace_Dots` | When enabled, replace field name dots with underscore. Required by Elasticsearch 2.0-2.3. | `Off` | +| `Trace_Output` | Print all ElasticSearch API request payloads to `stdout` for diagnostics. | `Off` | +| `Trace_Error` | If ElasticSearch returns an error, print the ElasticSearch API request and response for diagnostics. | `Off` | +| `Current_Time_Index` | Use current time for index generation instead of message record. | `Off` | +| `Suppress_Type_Name` | When enabled, mapping types is removed and `Type` option is ignored. Elasticsearch 8.0.0 or higher [no longer supports mapping types](https://www.elastic.co/guide/en/elasticsearch/reference/current/removal-of-types.html), and is set to `On`. | `Off` | +| `Workers` | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `2` | + +If you have used a common relational database, the parameters `index` and `type` can +be compared to the `database` and `table` concepts. ### TLS / SSL -Elasticsearch output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The Elasticsearch output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). -### write\_operation +### `write_operation` -The write\_operation can be any of: +The `write_operation` can be any of: -| Operation | Description | -| ------------- | ----------- | -| create (default) | adds new data - if the data already exists (based on its id), the op is skipped.| -| index | new data is added while existing data (based on its id) is replaced (reindexed).| -| update | updates existing data (based on its id). If no data is found, the op is skipped.| -| upsert | known as merge or insert if the data does not exist, updates if the data exists (based on its id).| +| Operation | Description | +| ----------- | ----------- | +| `create` | Adds new data. If the data already exists (based on its id), the op is skipped.| +| `index` | New data is added while existing data (based on its id) is replaced (reindexed).| +| `update` | Updates existing data (based on its id). If no data is found, the op is skipped. | +| `upsert` | Merge or insert if the data doesn't exist, updates if the data exists (based on its id).| -**Please note, `Id_Key` or `Generate_ID` is required in update, and upsert scenario.** +{% hint style="info" %} -## Getting Started +`Id_Key` or `Generate_ID` is required for `update` and `upsert`. -In order to insert records into a Elasticsearch service, you can run the plugin from the command line or through the configuration file: +{% endhint %} + +## Get started + +To insert records into an Elasticsearch service, you run the plugin from the +command line or through the configuration file: ### Command Line -The **es** plugin, can read the parameters from the command line in two ways, through the **-p** argument \(property\) or setting them directly through the service URI. The URI format is the following: +The **es** plugin can read the parameters from the command line in two ways: + +- Through the `-p` argument (property). +- Setting them directly through the service URI. + +The URI format is the following: ```text es://host:port/index/type @@ -83,21 +96,21 @@ es://host:port/index/type Using the format specified, you could start Fluent Bit through: -```text -$ fluent-bit -i cpu -t cpu -o es://192.168.2.3:9200/my_index/my_type \ +```shell copy +fluent-bit -i cpu -t cpu -o es://192.168.2.3:9200/my_index/my_type \ -o stdout -m '*' ``` -which is similar to do: +Which is similar to the following command: -```text -$ fluent-bit -i cpu -t cpu -o es -p Host=192.168.2.3 -p Port=9200 \ +```shell copy +fluent-bit -i cpu -t cpu -o es -p Host=192.168.2.3 -p Port=9200 \ -p Index=my_index -p Type=my_type -o stdout -m '*' ``` ### Configuration File -In your main configuration file append the following _Input_ & _Output_ sections. You can visualize this configuration [here](https://link.calyptia.com/qhq) +In your main configuration file append the following `Input` and `Output` sections. ```python [INPUT] @@ -113,11 +126,13 @@ In your main configuration file append the following _Input_ & _Output_ sections Type my_type ``` - + ## About Elasticsearch field names -Some input plugins may generate messages where the field names contains dots, since Elasticsearch 2.0 this is not longer allowed, so the current **es** plugin replaces them with an underscore, e.g: +Some input plugins can generate messages where the field names contains dots. For +Elasticsearch 2.0, this isn't allowed. The current **es** plugin replaces +them with an underscore: ```text {"cpu0.p_cpu"=>17.000000} @@ -129,58 +144,21 @@ becomes {"cpu0_p_cpu"=>17.000000} ``` -## FAQ - -### Elasticsearch rejects requests saying "the final mapping would have more than 1 type" <a id="faq-multiple-types"></a> - -Since Elasticsearch 6.0, you cannot create multiple types in a single index. This means that you cannot set up your configuration as below anymore. - -```text -[OUTPUT] - Name es - Match foo.* - Index search - Type type1 - -[OUTPUT] - Name es - Match bar.* - Index search - Type type2 -``` - -If you see an error message like below, you'll need to fix your configuration to use a single type on each index. +## Use Fluent Bit ElasticSearch plugin with other services -> Rejecting mapping update to \[search\] as the final mapping would have more than 1 type +Connect to Amazon OpenSearch or Elastic Cloud with the ElasticSearch plugin. -For details, please read [the official blog post on that issue](https://www.elastic.co/guide/en/elasticsearch/reference/6.7/removal-of-types.html). +### Amazon OpenSearch Service -### Elasticsearch rejects requests saying "Document mapping type name can't start with '\_'" <a id="faq-underscore"></a> +The Amazon OpenSearch Service adds an extra security layer where HTTP requests must +be signed with AWS Sigv4. Fluent Bit v1.5 introduced full support for Amazon +OpenSearch Service with IAM Authentication. -Fluent Bit v1.5 changed the default mapping type from `flb_type` to `_doc`, which matches the recommendation from Elasticsearch from version 6.2 forwards \([see commit with rationale](https://github.com/fluent/fluent-bit/commit/04ed3d8104ca8a2f491453777ae6e38e5377817e#diff-c9ae115d3acaceac5efb949edbb21196)\). This doesn't work in Elasticsearch versions 5.6 through 6.1 \([see Elasticsearch discussion and fix](https://discuss.elastic.co/t/cant-use-doc-as-type-despite-it-being-declared-the-preferred-method/113837/9)\). Ensure you set an explicit map \(such as `doc` or `flb_type`\) in the configuration, as seen on the last line: - -```text -[OUTPUT] - Name es - Match * - Host vpc-test-domain-ke7thhzoo7jawsrhmm6mb7ite7y.us-west-2.es.amazonaws.com - Port 443 - Index my_index - AWS_Auth On - AWS_Region us-west-2 - tls On - Type doc -``` - -### Fluent Bit + Amazon OpenSearch Service <a id="#aws-es"></a> - -The Amazon OpenSearch Service adds an extra security layer where HTTP requests must be signed with AWS Sigv4. Fluent Bit v1.5 introduced full support for Amazon OpenSearch Service with IAM Authentication. - -See [here](https://github.com/fluent/fluent-bit-docs/tree/43c4fe134611da471e706b0edb2f9acd7cdfdbc3/administration/aws-credentials.md) for details on how AWS credentials are fetched. +See [details](https://github.com/fluent/fluent-bit-docs/tree/43c4fe134611da471e706b0edb2f9acd7cdfdbc3/administration/aws-credentials.md) on how AWS credentials are fetched. Example configuration: -```text +```text copy [OUTPUT] Name es Match * @@ -193,16 +171,20 @@ Example configuration: tls On ``` -Notice that the `Port` is set to `443`, `tls` is enabled, and `AWS_Region` is set. +Be aware that the `Port` is set to `443`, `tls` is enabled, and `AWS_Region` is set. -### Fluent Bit + Elastic Cloud +### Use Fluent Bit with Elastic Cloud -Fluent Bit supports connecting to [Elastic Cloud](https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html) providing just the `cloud_id` and the `cloud_auth` settings. -`cloud_auth` uses the `elastic` user and password provided when the cluster was created, for details refer to the [Cloud ID usage page](https://www.elastic.co/guide/en/cloud/current/ec-cloud-id.html). +Fluent Bit supports connecting to +[Elastic Cloud](https://www.elastic.co/guide/en/cloud/current/ec-getting-started.html) +by providing the `cloud_id` and the `cloud_auth` settings. `cloud_auth` uses the +`elastic` user and password provided when the cluster was created. For details refer +to the +[Cloud ID usage page](https://www.elastic.co/guide/en/cloud/current/ec-cloud-id.html). Example configuration: -```text +```text copy [OUTPUT] Name es Include_Tag_Key true @@ -214,35 +196,99 @@ Example configuration: cloud_auth elastic:2vxxxxxxxxYV ``` -### Validation Failed: 1: an id must be provided if version type or value are set +In Elastic Cloud version 8 and great, the type option must be removed by setting +`Suppress_Type_Name On`. + +Without this you will see errors like: + +```text +{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"Action/metadata line [1] contains an unknown parameter [_type]"}],"type":"illegal_argument_exception","reason":"Action/metadata line [1] contains an unknown parameter [_type]"},"status":400} +``` -Since v1.8.2, Fluent Bit started using `create` method (instead of `index`) for data submission. -This makes Fluent Bit compatible with Datastream introduced in Elasticsearch 7.9. +## Troubleshooting -If you see `action_request_validation_exception` errors on your pipeline with Fluent Bit >= v1.8.2, you can fix it up by turning on `Generate_ID` as follows: +Use the following information to help resolve errors using the ElasticSearch plugin. + +### Using multiple types in a single index + +Elasticsearch 6.0 can't create multiple types in a single index. An error message +like the following indicates you need to update your configuration to use a single +type on each index. + +```text +Rejecting mapping update to [products] as the final mapping would have more than 1 type: +``` + +This means that you can't set up your configuration like the following:. ```text [OUTPUT] - Name es - Match * - Host 192.168.12.1 - Generate_ID on + Name es + Match foo.* + Index search + Type type1 + +[OUTPUT] + Name es + Match bar.* + Index search + Type type2 ``` -### Action/metadata contains an unknown parameter type +For details, read [the official blog post on that issue](https://www.elastic.co/guide/en/elasticsearch/reference/6.7/removal-of-types.html). -Elastic Cloud is now on version 8 so the type option must be removed by setting `Suppress_Type_Name On` as indicated above. +### Mapping type names can't start with underscores (`_`) -Without this you will see errors like: +Fluent Bit v1.5 changed the default mapping type from `flb_type` to `_doc`, matching +the recommendation from Elasticsearch for version 6.2 and greater +([see commit with +rationale](https://github.com/fluent/fluent-bit/commit/04ed3d8104ca8a2f491453777ae6e38e5377817e#diff-c9ae115d3acaceac5efb949edbb21196)). + +This doesn't work in Elasticsearch versions 5.6 through 6.1 +([discussion and fix](https://discuss.elastic.co/t/cant-use-doc-as-type-despite-it-being-declared-the-preferred-method/113837/9)). + +Ensure you set an explicit map such as `doc` or `flb_type` in the configuration, +as seen on the last line: + +```text copy +[OUTPUT] + Name es + Match * + Host vpc-test-domain-ke7thhzoo7jawsrhmm6mb7ite7y.us-west-2.es.amazonaws.com + Port 443 + Index my_index + AWS_Auth On + AWS_Region us-west-2 + tls On + Type doc +``` + +### Validation failures + +In Fluent Bit v1.8.2 and greater, Fluent Bit started using `create` method (instead +of `index`) for data submission. This makes Fluent Bit compatible with Datastream, +introduced in Elasticsearch 7.9. You might see errors like: ```text -{"error":{"root_cause":[{"type":"illegal_argument_exception","reason":"Action/metadata line [1] contains an unknown parameter [_type]"}],"type":"illegal_argument_exception","reason":"Action/metadata line [1] contains an unknown parameter [_type]"},"status":400} +Validation Failed: 1: an id must be provided if version type or value are set +``` + +If you see `action_request_validation_exception` errors on your pipeline with +Fluent Bit versions greater than v1.8.2, correct them by turning on `Generate_ID` +as follows: + +```text copy +[OUTPUT] + Name es + Match * + Host 192.168.12.1 + Generate_ID on ``` -### Logstash_Prefix_Key +### `Logstash_Prefix_Key` The following snippet demonstrates using the namespace name as extracted by the -`kubernetes` filter as logstash prefix: +`kubernetes` filter as `logstash` prefix: ```text [OUTPUT] @@ -250,8 +296,9 @@ The following snippet demonstrates using the namespace name as extracted by the Match * # ... Logstash_Prefix logstash - Logstash_Prefix_Key $kubernetes["namespace_name"] + Logstash_Prefix_Key $kubernetes['namespace_name'] # ... ``` -For records that do nor have the field `kubernetes.namespace_name`, the default prefix, `logstash` will be used. +For records that don't have the field `kubernetes.namespace_name`, the default prefix +`logstash` will be used. diff --git a/pipeline/outputs/file.md b/pipeline/outputs/file.md index 5dde1b862..b7bd9d511 100644 --- a/pipeline/outputs/file.md +++ b/pipeline/outputs/file.md @@ -12,7 +12,7 @@ The plugin supports the following configuration parameters: | File | Set file name to store the records. If not set, the file name will be the _tag_ associated with the records. | | Format | The format of the file content. See also Format section. Default: out\_file. | | Mkdir | Recursively create output directory if it does not exist. Permissions set to 0755. | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 1 | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `1` | ## Format @@ -75,6 +75,7 @@ For example, if you set up the configuration as below: [OUTPUT] Name file + Match * Format template Template {time} used={Mem.used} free={Mem.free} total={Mem.total} ``` @@ -111,4 +112,3 @@ In your main configuration file append the following Input & Output sections: Match * Path output_dir ``` - diff --git a/pipeline/outputs/firehose.md b/pipeline/outputs/firehose.md index e896610c9..d4a8d831a 100644 --- a/pipeline/outputs/firehose.md +++ b/pipeline/outputs/firehose.md @@ -28,6 +28,7 @@ See [here](https://github.com/fluent/fluent-bit-docs/tree/43c4fe134611da471e706b | auto\_retry\_requests | Immediately retry failed requests to AWS services once. This option does not affect the normal Fluent Bit retry mechanism with backoff. Instead, it enables an immediate retry with no delay for networking errors, which may help improve throughput when there are transient/random networking issues. This option defaults to `true`. | | external\_id | Specify an external ID for the STS API, can be used with the role_arn parameter if your role requires an external ID. | | profile | AWS profile name to use. Defaults to `default`. | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. Default: `1`. | ## Getting Started @@ -132,4 +133,3 @@ aws ssm get-parameters-by-path --path /aws/service/aws-for-fluent-bit/ ``` For more see [the AWS for Fluent Bit github repo](https://github.com/aws/aws-for-fluent-bit#public-images). - diff --git a/pipeline/outputs/flowcounter.md b/pipeline/outputs/flowcounter.md index 69bc75ebd..a6b12e462 100644 --- a/pipeline/outputs/flowcounter.md +++ b/pipeline/outputs/flowcounter.md @@ -9,6 +9,7 @@ The plugin supports the following configuration parameters: | Key | Description | Default | | :--- | :--- | :--- | | Unit | The unit of duration. \(second/minute/hour/day\) | minute | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ## Getting Started @@ -42,7 +43,7 @@ In your main configuration file append the following Input & Output sections: Once Fluent Bit is running, you will see the reports in the output interface similar to this: ```bash -$ fluent-bit -i cpu -o flowcounter +$ fluent-bit -i cpu -o flowcounter Fluent Bit v1.x.x * Copyright (C) 2019-2020 The Fluent Bit Authors * Copyright (C) 2015-2018 Treasure Data @@ -52,4 +53,3 @@ Fluent Bit v1.x.x [2016/12/23 11:01:20] [ info] [engine] started [out_flowcounter] cpu.0:[1482458540, {"counts":60, "bytes":7560, "counts/minute":1, "bytes/minute":126 }] ``` - diff --git a/pipeline/outputs/forward.md b/pipeline/outputs/forward.md index 4e6d297f3..df861c52a 100644 --- a/pipeline/outputs/forward.md +++ b/pipeline/outputs/forward.md @@ -22,8 +22,8 @@ The following parameters are mandatory for either Forward for Secure Forward mod | Tag | Overwrite the tag as we transmit. This allows the receiving pipeline start fresh, or to attribute source. | | | Send_options | Always send options (with "size"=count of messages) | False | | Require_ack_response | Send "chunk"-option and wait for "ack" response from server. Enables at-least-once and receiving server can control rate of traffic. (Requires Fluentd v0.14.0+ server) | False | -| Compress | Set to "gzip" to enable gzip compression. Incompatible with Time_as_Integer=True and tags set dynamically using the [Rewrite Tag](https://app.gitbook.com/s/-LKKSx-3LBTCtaHbg0gl-887967055/pipeline/filters/rewrite-tag.md) filter. (Requires Fluentd v0.14.7+ server) | | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 2 | +| Compress | Set to 'gzip' to enable gzip compression. Incompatible with `Time_as_Integer=True` and tags set dynamically using the [Rewrite Tag](../filters/rewrite-tag.md) filter. Requires Fluentd server v0.14.7 or later. | _none_ | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `2` | ## Secure Forward Mode Configuration Parameters diff --git a/pipeline/outputs/gelf.md b/pipeline/outputs/gelf.md index ee115ec10..db401075b 100644 --- a/pipeline/outputs/gelf.md +++ b/pipeline/outputs/gelf.md @@ -14,6 +14,7 @@ According to [GELF Payload Specification](https://go2docs.graylog.org/5-0/gettin | Host | IP address or hostname of the target Graylog server | 127.0.0.1 | | Port | The port that your Graylog GELF input is listening on | 12201 | | Mode | The protocol to use (`tls`, `tcp` or `udp`) | udp | +| Gelf\_Tag\_Key | Key to be used for tag. (_Optional in GELF_) | | | Gelf_Short_Message_Key | A short descriptive message (**MUST be set in GELF**) | short_message | | Gelf_Timestamp_Key | Your log timestamp (_SHOULD be set in GELF_) | timestamp | | Gelf_Host_Key | Key which its value is used as the name of the host, source or application that sent this message. (**MUST be set in GELF**) | host | @@ -21,10 +22,12 @@ According to [GELF Payload Specification](https://go2docs.graylog.org/5-0/gettin | Gelf_Level_Key | Key to be used as the log level. Its value must be in [standard syslog levels](https://en.wikipedia.org/wiki/Syslog#Severity_level) (between 0 and 7). (_Optional in GELF_) | level | | Packet_Size | If transport protocol is `udp`, you can set the size of packets to be sent. | 1420 | | Compress | If transport protocol is `udp`, you can set this if you want your UDP packets to be compressed. | true | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ### TLS / SSL -GELF output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The GELF output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ## Notes diff --git a/pipeline/outputs/http.md b/pipeline/outputs/http.md index 59ed7b5f5..02611730c 100644 --- a/pipeline/outputs/http.md +++ b/pipeline/outputs/http.md @@ -33,11 +33,12 @@ The **http** output plugin allows to flush your records into a HTTP endpoint. Fo | gelf\_level\_key | Specify the key to use for the `level` in _gelf_ format | | | body\_key | Specify the key to use as the body of the request (must prefix with "$"). The key must contain either a binary or raw string, and the content type can be specified using headers\_key (which must be passed whenever body\_key is present). When this option is present, each msgpack record will create a separate request. | | | headers\_key | Specify the key to use as the headers of the request (must prefix with "$"). The key must contain a map, which will have the contents merged on the request headers. This can be used for many purposes, such as specifying the content-type of the data contained in body\_key. | | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 2 | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `2` | ### TLS / SSL -HTTP output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The HTTP output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ## Getting Started diff --git a/pipeline/outputs/influxdb.md b/pipeline/outputs/influxdb.md index 53a8fe41b..19c7da4b2 100644 --- a/pipeline/outputs/influxdb.md +++ b/pipeline/outputs/influxdb.md @@ -19,10 +19,13 @@ The **influxdb** output plugin, allows to flush your records into a [InfluxDB](h | Tag\_Keys | Space separated list of keys that needs to be tagged | | | Auto\_Tags | Automatically tag keys where value is _string_. This option takes a boolean value: True/False, On/Off. | Off | | Uri | Custom URI endpoint | | +| Add\_Integer\_Suffix | Use integer type of [influxdb's line protocol](https://docs.influxdata.com/influxdb/v1/write_protocols/line_protocol_reference/). This option takes a boolean value: `True|False`, `On|Off`. | `Off` | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ### TLS / SSL -InfluxDB output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The InfluxDB output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ## Getting Started @@ -207,4 +210,3 @@ key value method "MATCH" method "POST" ``` - diff --git a/pipeline/outputs/kafka-rest-proxy.md b/pipeline/outputs/kafka-rest-proxy.md index 399d57108..6d63cee19 100644 --- a/pipeline/outputs/kafka-rest-proxy.md +++ b/pipeline/outputs/kafka-rest-proxy.md @@ -15,10 +15,12 @@ The **kafka-rest** output plugin, allows to flush your records into a [Kafka RES | Time\_Key\_Format | Defines the format of the timestamp. | %Y-%m-%dT%H:%M:%S | | Include\_Tag\_Key | Append the Tag name to the final record. | Off | | Tag\_Key | If Include\_Tag\_Key is enabled, this property defines the key name for the tag. | \_flb-key | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ### TLS / SSL -Kafka REST Proxy output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The Kafka REST Proxy output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ## Getting Started @@ -49,4 +51,3 @@ In your main configuration file append the following _Input_ & _Output_ sections Topic fluent-bit Message_Key my_key ``` - diff --git a/pipeline/outputs/kafka.md b/pipeline/outputs/kafka.md index cfe3e4f75..4599b62da 100644 --- a/pipeline/outputs/kafka.md +++ b/pipeline/outputs/kafka.md @@ -6,7 +6,7 @@ Kafka output plugin allows to ingest your records into an [Apache Kafka](https:/ | Key | Description | default | | :--- | :--- | :--- | -| format | Specify data format, options available: json, msgpack. | json | +| format | Specify data format, options available: json, msgpack, raw. | json | | message\_key | Optional key to store the message | | | message\_key\_field | If set, the value of Message\_Key\_Field in the record will indicate the message key. If not set nor found in the record, Message\_Key will be used \(if set\). | | | timestamp\_key | Set the key to store the record timestamp | @timestamp | @@ -17,6 +17,8 @@ Kafka output plugin allows to ingest your records into an [Apache Kafka](https:/ | dynamic\_topic | adds unknown topics \(found in Topic\_Key\) to Topics. So in Topics only a default topic needs to be configured | Off | | queue\_full\_retries | Fluent Bit queues data into rdkafka library, if for some reason the underlying library cannot flush the records the queue might fills up blocking new addition of records. The `queue_full_retries` option set the number of local retries to enqueue the data. The default value is 10 times, the interval between each retry is 1 second. Setting the `queue_full_retries` value to `0` set's an unlimited number of retries. | 10 | | rdkafka.{property} | `{property}` can be any [librdkafka properties](https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md) | | +| raw\_log\_key | When using the raw format and set, the value of raw\_log\_key in the record will be send to kafka as the payload. | | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | > Setting `rdkafka.log.connection.close` to `false` and `rdkafka.request.required.acks` to 1 are examples of recommended settings of librdfkafka properties. @@ -114,3 +116,28 @@ specific avro schema. rdkafka.log_level 7 rdkafka.metadata.broker.list 192.168.1.3:9092 ``` + +#### Kafka Configuration File with Raw format + +This example Fluent Bit configuration file creates example records with the +_payloadkey_ and _msgkey_ keys. The _msgkey_ value is used as the Kafka message +key, and the _payloadkey_ value as the payload. + + +```text +[INPUT] + Name example + Tag example.data + Dummy {"payloadkey":"Data to send to kafka", "msgkey": "Key to use in the message"} + + +[OUTPUT] + Name kafka + Match * + Brokers 192.168.1.3:9092 + Topics test + Format raw + + Raw_Log_Key payloadkey + Message_Key_Field msgkey +``` diff --git a/pipeline/outputs/kinesis.md b/pipeline/outputs/kinesis.md index b21766678..e6c64183c 100644 --- a/pipeline/outputs/kinesis.md +++ b/pipeline/outputs/kinesis.md @@ -25,10 +25,12 @@ See [here](https://github.com/fluent/fluent-bit-docs/tree/43c4fe134611da471e706b | log\_key | By default, the whole log record will be sent to Kinesis. If you specify a key name with this option, then only the value of that key will be sent to Kinesis. For example, if you are using the Fluentd Docker log driver, you can specify `log_key log` and only the log message will be sent to Kinesis. | | role\_arn | ARN of an IAM role to assume \(for cross account access\). | | endpoint | Specify a custom endpoint for the Kinesis API. | +| port | TCP port of the Kinesis Streams service. Defaults to port `443`. | | sts\_endpoint | Custom endpoint for the STS API. | | auto\_retry\_requests | Immediately retry failed requests to AWS services once. This option does not affect the normal Fluent Bit retry mechanism with backoff. Instead, it enables an immediate retry with no delay for networking errors, which may help improve throughput when there are transient/random networking issues. This option defaults to `true`. | | external\_id | Specify an external ID for the STS API, can be used with the role_arn parameter if your role requires an external ID. | | profile | AWS profile name to use. Defaults to `default`. | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. Default: `1`. | ## Getting Started @@ -71,23 +73,6 @@ The following AWS IAM permissions are required to use this plugin: } ``` -### Worker support - -Fluent Bit 1.7 adds a new feature called `workers` which enables outputs to have dedicated threads. This `kinesis_streams` plugin fully supports workers. - -Example: - -```text -[OUTPUT] - Name kinesis_streams - Match * - region us-east-1 - stream my-stream - workers 2 -``` - -If you enable a single worker, you are enabling a dedicated thread for your Kinesis output. We recommend starting with without workers, evaluating the performance, and then adding workers one at a time until you reach your desired/needed throughput. For most users, no workers or a single worker will be sufficient. - ### AWS for Fluent Bit Amazon distributes a container image with Fluent Bit and these plugins. @@ -133,4 +118,3 @@ aws ssm get-parameters-by-path --path /aws/service/aws-for-fluent-bit/ ``` For more see [the AWS for Fluent Bit github repo](https://github.com/aws/aws-for-fluent-bit#public-images). - diff --git a/pipeline/outputs/logdna.md b/pipeline/outputs/logdna.md index 3416dff2a..e7432c2f3 100644 --- a/pipeline/outputs/logdna.md +++ b/pipeline/outputs/logdna.md @@ -32,6 +32,11 @@ Before to get started with the plugin configuration, make sure to obtain the pro <td style="text-align:left">LogDNA TCP Port</td> <td style="text-align:left">443</td> </tr> + <tr> + <td style="text-align:left">logdna_endpoint</td> + <td style="text-align:left">LogDNA ingestion endpoint</td> + <td style="text-align:left">/logs/ingest</td> + </tr> <tr> <td style="text-align:left">api_key</td> <td style="text-align:left">API key to get access to the service. This property is <b>mandatory</b>.</td> @@ -78,6 +83,11 @@ Before to get started with the plugin configuration, make sure to obtain the pro if not found, the default value is used.</td> <td style="text-align:left">Fluent Bit</td> </tr> + <tr> + <td style="text-align:left">workers</td> + <td style="text-align:left">The number of <a href="https://docs.fluentbit.io/manual/administration/multithreading#outputs">workers</a> to perform flush operations for this output.</td> + <td style="text-align:left">`0`</td> + </tr> </tbody> </table> @@ -150,4 +160,3 @@ Your record will be available and visible in your LogDNA dashboard after a few s In your LogDNA dashboard, go to the top filters and mark the Tags `aa` and `bb`, then you will be able to see your records as the example below:  - diff --git a/pipeline/outputs/loki.md b/pipeline/outputs/loki.md index ada685578..859c273a1 100644 --- a/pipeline/outputs/loki.md +++ b/pipeline/outputs/loki.md @@ -10,25 +10,29 @@ Be aware there is a separate Golang output plugin provided by [Grafana](https:// ## Configuration Parameters -| Key | Description | Default | -| :--- | :--- | :--- | -| host | Loki hostname or IP address. Do not include the subpath, i.e. `loki/api/v1/push`, but just the base hostname/URL. | 127.0.0.1 | -| uri | Specify a custom HTTP URI. It must start with forward slash.| /loki/api/v1/push | -| port | Loki TCP port | 3100 | -| tls | Use TLS authentication | off | -| http\_user | Set HTTP basic authentication user name | | -| http\_passwd | Set HTTP basic authentication password | | -| bearer\_token | Set bearer token authentication token value. | | -| tenant\_id | Tenant ID used by default to push logs to Loki. If omitted or empty it assumes Loki is running in single-tenant mode and no X-Scope-OrgID header is sent. | | -| labels | Stream labels for API request. It can be multiple comma separated of strings specifying `key=value` pairs. In addition to fixed parameters, it also allows to add custom record keys \(similar to `label_keys` property\). More details in the Labels section. | job=fluentbit | -| label\_keys | Optional list of record keys that will be placed as stream labels. This configuration property is for records key only. More details in the Labels section. | | -| label\_map\_path | Specify the label map file path. The file defines how to extract labels from each record. More details in the Labels section. | | -| remove\_keys | Optional list of keys to remove. | | -| drop\_single\_key | If set to true and after extracting labels only a single key remains, the log line sent to Loki will be the value of that key in line\_format. | off | -| line\_format | Format to use when flattening the record to a log line. Valid values are `json` or `key_value`. If set to `json`, the log line sent to Loki will be the Fluent Bit record dumped as JSON. If set to `key_value`, the log line will be each item in the record concatenated together \(separated by a single space\) in the format. | json | -| auto\_kubernetes\_labels | If set to true, it will add all Kubernetes labels to the Stream labels | off | -| tenant\_id\_key | Specify the name of the key from the original record that contains the Tenant ID. The value of the key is set as `X-Scope-OrgID` of HTTP header. It is useful to set Tenant ID dynamically. || -| compress | Set payload compression mechanism. The only available option is gzip. Default = "", which means no compression. || +| Key | Description | Default | +|:--------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------| +| host | Loki hostname or IP address. Do not include the subpath, i.e. `loki/api/v1/push`, but just the base hostname/URL. | 127.0.0.1 | +| uri | Specify a custom HTTP URI. It must start with forward slash. | /loki/api/v1/push | +| port | Loki TCP port | 3100 | +| tls | Use TLS authentication | off | +| http\_user | Set HTTP basic authentication user name | | +| http\_passwd | Set HTTP basic authentication password | | +| bearer\_token | Set bearer token authentication token value. | | +| header | Add additional arbitrary HTTP header key/value pair. Multiple headers can be set. | | +| tenant\_id | Tenant ID used by default to push logs to Loki. If omitted or empty it assumes Loki is running in single-tenant mode and no X-Scope-OrgID header is sent. | | +| labels | Stream labels for API request. It can be multiple comma separated of strings specifying `key=value` pairs. In addition to fixed parameters, it also allows to add custom record keys \(similar to `label_keys` property\). More details in the Labels section. | job=fluent-bit | +| label\_keys | Optional list of record keys that will be placed as stream labels. This configuration property is for records key only. More details in the Labels section. | | +| label\_map\_path | Specify the label map file path. The file defines how to extract labels from each record. More details in the Labels section. | | +| structured\_metadata | Optional comma-separated list of `key=value` strings specifying structured metadata for the log line. Like the `labels` parameter, values can reference record keys using record accessors. See [Structured metadata](#structured-metadata) for more information. | | +| structured\_metadata\_map\_keys | Optional comma-separated list of record key strings specifying record values of type map, used to dynamically populate structured metadata for the log line. Values can only reference record keys using record accessors, which should reference map values. Each entry from the referenced map will be used to add an entry to the structured metadata. See [Structured metadata](#structured-metadata) for more information. | | +| remove\_keys | Optional list of keys to remove. | | +| drop\_single\_key | If set to true and after extracting labels only a single key remains, the log line sent to Loki will be the value of that key in line\_format. If set to `raw` and the log line is a string, the log line will be sent unquoted. | off | +| line\_format | Format to use when flattening the record to a log line. Valid values are `json` or `key_value`. If set to `json`, the log line sent to Loki will be the Fluent Bit record dumped as JSON. If set to `key_value`, the log line will be each item in the record concatenated together \(separated by a single space\) in the format. | json | +| auto\_kubernetes\_labels | If set to true, it will add all Kubernetes labels to the Stream labels | off | +| tenant\_id\_key | Specify the name of the key from the original record that contains the Tenant ID. The value of the key is set as `X-Scope-OrgID` of HTTP header. It is useful to set Tenant ID dynamically. | | +| compress | Set payload compression mechanism. The only available option is gzip. Default = "", which means no compression. | | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ## Labels @@ -175,6 +179,138 @@ Based in the JSON example provided above, the internal stream labels will be: job="fluentbit", team="Santiago Wanderers" ``` +## Drop Single Key + +If there is only one key remaining after removing keys, you can use the `drop_single_key` property to send its value to Loki, rather than a single key=value pair. + +Consider this simple JSON example: + +```json +{"key":"value"} +``` + +If the value is a string, `line_format` is `json`, and `drop_single_key` is `true`, it will be sent as a quoted string. + +```python +[OUTPUT] + name loki + match * + drop_single_key on + line_format json +``` + +The outputted line would show in Loki as: + +```json +"value" +``` + +If `drop_single_key` is `raw`, or `line_format` is `key_value`, it will show in Loki as: + +```text +value +``` + +If you want both structured JSON and plain-text logs in Loki, you should set `drop_single_key` to `raw` and `line_format` to `json`. +Loki does not interpret a quoted string as valid JSON, and so to remove the quotes without `drop_single_key` set to raw, you would need to use a query like this: + +```C +{"job"="fluent-bit"} | regexp `^"?(?P<log>.*?)"?$` | line_format "{{.log}}" +``` + +If `drop_single_key` is `off`, it will show in Loki as: + +```json +{"key":"value"} +``` + +You can get the same behavior this flag provides in Loki with `drop_single_key` set to `off` with this query: + +```C +{"job"="fluent-bit"} | json | line_format "{{.log}}" +``` + +### Structured Metadata + +[Structured metadata](https://grafana.com/docs/loki/latest/get-started/labels/structured-metadata/) +lets you attach custom fields to individual log lines without embedding the +information in the content of the log line. This capability works well for high +cardinality data that isn't suited for using labels. While not a label, the +`structured_metadata` configuration parameter operates similarly to the `labels` +parameter. Both parameters are comma-delimited `key=value` lists, and both can use +record accessors to reference keys within the record being processed. + +The following configuration: + +- Defines fixed values for the cluster and region labels. +- Uses the record accessor pattern to set the namespace label to the namespace name as + determined by the Kubernetes metadata filter (not shown). +- Uses a structured metadata field to hold the Kubernetes pod name. + +```python +[OUTPUT] + name loki + match * + labels cluster=my-k8s-cluster, region=us-east-1, namespace=$kubernetes['namespace_name'] + structured_metadata pod=$kubernetes['pod_name'] +``` + +Other common uses for structured metadata include trace and span IDs, process and thread IDs, and log levels. + +Structured metadata is officially supported starting with Loki 3.0, and shouldn't be used +with Loki deployments prior to Loki 3.0. + +#### Structured Metadata Maps +In addition to the `structured_metadata` configuration parameter, a `structured_metadata_map_keys` is available, which can be used to dynamically populate structured metadata from map values in the log record. `structured_metadata_map_keys` can be set with a list of record accessors, where each one should reference map values in the log record. Record accessors which do not match a map value will simply be skipped. + +The following configuration is similar to the above, except now all entries in the log record map value `$kubernetes` will be used as structured metadata entries. + +{% tabs %} +{% tab title="fluent-bit.conf" %} +```text +[OUTPUT] + name loki + match * + labels cluster=my-k8s-cluster, region=us-east-1 + structured_metadata_map_keys $kubernetes +``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml + outputs: + - name: loki + match: * + labels: cluster=my-k8s-cluster, region=us-east-1 + structured_metadata_map_keys: $kubernetes +``` +{% endtab %} +{% endtabs %} + +Assuming the value `$kubernetes` is a map containing two entries `namespace_name` and `pod_name`, the above configuration is equivalent to: + +{% tabs %} +{% tab title="fluent-bit.conf" %} +```text +[OUTPUT] + name loki + match * + labels cluster=my-k8s-cluster, region=us-east-1 + structured_metadata $kubernetes['namespace_name'],$kubernetes['pod_name'] +``` +{% endtab %} + +{% tab title="fluent-bit.yaml" %} +```yaml + outputs: + - name: loki + match: * + labels: cluster=my-k8s-cluster, region=us-east-1 + structured_metadata: $kubernetes['namespace_name'], $kubernetes['pod_name'] +``` +{% endtab %} +{% endtabs %} + ## Networking and TLS Configuration This plugin inherit core Fluent Bit features to customize the network behavior and optionally enable TLS in the communication channel. For more details about the specific options available refer to the following articles: @@ -251,4 +387,3 @@ Fluent Bit v1.7.0 [2020/10/14 20:57:46] [debug] [http] request payload (272 bytes) [2020/10/14 20:57:46] [ info] [output:loki:loki.0] 127.0.0.1:3100, HTTP status=204 ``` - diff --git a/pipeline/outputs/nats.md b/pipeline/outputs/nats.md index c2586e45a..10d17a004 100644 --- a/pipeline/outputs/nats.md +++ b/pipeline/outputs/nats.md @@ -2,12 +2,13 @@ The **nats** output plugin, allows to flush your records into a [NATS Server](https://docs.nats.io/nats-concepts/intro) end point. The following instructions assumes that you have a fully operational NATS Server in place. -In order to flush records, the **nats** plugin requires to know two parameters: +## Configuration parameters | parameter | description | default | | :--- | :--- | :--- | | host | IP address or hostname of the NATS Server | 127.0.0.1 | | port | TCP port of the target NATS Server | 4222 | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | In order to override the default configuration values, the plugin uses the optional Fluent Bit network address format, e.g: @@ -64,4 +65,3 @@ Each record is an individual entity represented in a JSON array that contains a [1457108506,{"tag":"fluentbit","cpu_p":6.500000,"user_p":4.500000,"system_p":2}] ] ``` - diff --git a/pipeline/outputs/new-relic.md b/pipeline/outputs/new-relic.md index 29219f6c8..074acce00 100644 --- a/pipeline/outputs/new-relic.md +++ b/pipeline/outputs/new-relic.md @@ -72,6 +72,7 @@ Before to get started with the plugin configuration, make sure to obtain the pro </table> | compress | Set the compression mechanism for the payload. This option allows two values: `gzip` \(enabled by default\) or `false` to disable compression. | gzip | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | | :--- | :--- | :--- | @@ -117,4 +118,3 @@ Fluent Bit v1.5.0 [2020/04/10 10:58:35] [ info] [output:nrlogs:nrlogs.0] log-api.newrelic.com:443, HTTP status=202 {"requestId":"feb312fe-004e-b000-0000-0171650764ac"} ``` - diff --git a/pipeline/outputs/observe.md b/pipeline/outputs/observe.md index 2e722422e..47be2503f 100644 --- a/pipeline/outputs/observe.md +++ b/pipeline/outputs/observe.md @@ -2,7 +2,7 @@ Observe employs the **http** output plugin, allowing you to flush your records [into Observe](https://docs.observeinc.com/en/latest/content/data-ingestion/forwarders/fluentbit.html). -For now the functionality is pretty basic and it issues a POST request with the data records in [MessagePack](http://msgpack.org) (or JSON) format. +For now the functionality is pretty basic and it issues a POST request with the data records in [MessagePack](http://msgpack.org) (or JSON) format. The following are the specific HTTP parameters to employ: @@ -19,6 +19,7 @@ The following are the specific HTTP parameters to employ: | header | The specific header to instructs Observe how to decode incoming payloads | X-Observe-Decoder fluent | | compress | Set payload compression mechanism. Option available is 'gzip' | gzip | | tls.ca_file | **For use with Windows**: provide path to root cert | | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ### Configuration File @@ -41,5 +42,5 @@ In your main configuration file, append the following _Input_ & _Output_ section # For Windows: provide path to root cert #tls.ca_file C:\fluent-bit\isrgrootx1.pem - + ``` diff --git a/pipeline/outputs/oci-logging-analytics.md b/pipeline/outputs/oci-logging-analytics.md index 54abb039a..253b86f12 100644 --- a/pipeline/outputs/oci-logging-analytics.md +++ b/pipeline/outputs/oci-logging-analytics.md @@ -20,6 +20,7 @@ Following are the top level configuration properties of the plugin: | profile_name | OCI Config Profile Name to be used from the configuration file | DEFAULT | | namespace | OCI Tenancy Namespace in which the collected log data is to be uploaded | | | proxy | define proxy if required, in http://host:port format, supports only http protocol | | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `1` | The following parameters are to set the Logging Analytics resources that must be used to process your logs by OCI Logging Analytics. @@ -28,7 +29,7 @@ The following parameters are to set the Logging Analytics resources that must be | oci_config_in_record | If set to true, the following oci_la_* will be read from the record itself instead of the output plugin configuration. | false | | oci_la_log_group_id | The OCID of the Logging Analytics Log Group where the logs must be stored. This is a mandatory parameter | | | oci_la_log_source_name | The Logging Analytics Source that must be used to process the log records. This is a mandatory parameter | | -| oci_la_entity_id | The OCID of the Logging Analytics Entity | | +| oci_la_entity_id | The OCID of the Logging Analytics Entity | | | oci_la_entity_type | The entity type of the Logging Analytics Entity | | | oci_la_log_path | Specify the original location of the log files | | | oci_la_global_metadata | Use this parameter to specify additional global metadata along with original log content to Logging Analytics. The format is 'key_name value'. This option can be set multiple times | | @@ -36,7 +37,8 @@ The following parameters are to set the Logging Analytics resources that must be ## TLS/SSL -OCI Logging Analytics output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The OCI Logging Analytics output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ## Getting Started @@ -86,11 +88,13 @@ In case of multiple inputs, where oci_la_* properties can differ, you can add th [INPUT] Name dummy Tag dummy + [Filter] Name modify Match * Add oci_la_log_source_name <LOG_SOURCE_NAME> Add oci_la_log_group_id <LOG_GROUP_OCID> + [Output] Name oracle_log_analytics Match * @@ -109,6 +113,7 @@ You can attach certain metadata to the log events collected from various inputs. [INPUT] Name dummy Tag dummy + [Output] Name oracle_log_analytics Match * @@ -138,12 +143,12 @@ The above configuration will generate a payload that looks like this "metadata": { "key1": "value1", "key2": "value2" - }, - "logSourceName": "example_log_source", - "logRecords": [ - "dummy" - ] - } + }, + "logSourceName": "example_log_source", + "logRecords": [ + "dummy" + ] + } ] } ``` @@ -156,11 +161,13 @@ With oci_config_in_record option set to true, the metadata key-value pairs will [INPUT] Name dummy Tag dummy + [FILTER] Name Modify Match * Add olgm.key1 val1 Add olgm.key2 val2 + [FILTER] Name nest Match * @@ -168,11 +175,13 @@ With oci_config_in_record option set to true, the metadata key-value pairs will Wildcard olgm.* Nest_under oci_la_global_metadata Remove_prefix olgm. + [Filter] Name modify Match * Add oci_la_log_source_name <LOG_SOURCE_NAME> Add oci_la_log_group_id <LOG_GROUP_OCID> + [Output] Name oracle_log_analytics Match * @@ -184,4 +193,4 @@ With oci_config_in_record option set to true, the metadata key-value pairs will tls.verify Off ``` -The above configuration first injects the necessary metadata keys and values in the record directly, with a prefix olgm. attached to the keys in order to segregate the metadata keys from rest of the record keys. Then, using a nest filter only the metadata keys are selected by the filter and nested under oci_la_global_metadata key in the record, and the prefix olgm. is removed from the metadata keys. \ No newline at end of file +The above configuration first injects the necessary metadata keys and values in the record directly, with a prefix olgm. attached to the keys in order to segregate the metadata keys from rest of the record keys. Then, using a nest filter only the metadata keys are selected by the filter and nested under oci_la_global_metadata key in the record, and the prefix olgm. is removed from the metadata keys. diff --git a/pipeline/outputs/openobserve.md b/pipeline/outputs/openobserve.md new file mode 100644 index 000000000..264bad156 --- /dev/null +++ b/pipeline/outputs/openobserve.md @@ -0,0 +1,2 @@ +# OpenObserve + diff --git a/pipeline/outputs/opensearch.md b/pipeline/outputs/opensearch.md index e238486e0..445504dd0 100644 --- a/pipeline/outputs/opensearch.md +++ b/pipeline/outputs/opensearch.md @@ -45,14 +45,15 @@ The following instructions assumes that you have a fully operational OpenSearch | Trace\_Error | When enabled print the OpenSearch API calls to stdout when OpenSearch returns an error \(for diag only\) | Off | | Current\_Time\_Index | Use current time for index generation instead of message record | Off | | Suppress\_Type\_Name | When enabled, mapping types is removed and `Type` option is ignored. | Off | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 2 | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | | Compress | Set payload compression mechanism. The only available option is `gzip`. Default = "", which means no compression. | | > The parameters _index_ and _type_ can be confusing if you are new to OpenSearch, if you have used a common relational database before, they can be compared to the _database_ and _table_ concepts. Also see [the FAQ below](opensearch.md#faq) ### TLS / SSL -OpenSearch output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The OpenSearch output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ### write\_operation @@ -199,7 +200,7 @@ With data access permissions, IAM policies are not needed to access the collecti ### Issues with the OpenSearch cluster -Occasionally the Fluent Bit service may generate errors without any additional detail in the logs to explain the source of the issue, even with the service's log_level attribute set to [Debug](https://docs.fluentbit.io/manual/administration/configuring-fluent-bit/classic-mode/configuration-file). +Occasionally the Fluent Bit service may generate errors without any additional detail in the logs to explain the source of the issue, even with the service's log_level attribute set to [Debug](https://docs.fluentbit.io/manual/administration/configuring-fluent-bit/classic-mode/configuration-file). For example, in this scenario the logs show that a connection was successfully established with the OpenSearch domain, and yet an error is still returned: ``` @@ -218,9 +219,9 @@ This behavior could be indicative of a hard-to-detect issue with index shard usa While OpenSearch index shards and disk space are related, they are not directly tied to one another. -OpenSearch domains are limited to 1000 index shards per data node, regardless of the size of the nodes. And, importantly, shard usage is not proportional to disk usage: an individual index shard can hold anywhere from a few kilobytes to dozens of gigabytes of data. +OpenSearch domains are limited to 1000 index shards per data node, regardless of the size of the nodes. And, importantly, shard usage is not proportional to disk usage: an individual index shard can hold anywhere from a few kilobytes to dozens of gigabytes of data. -In other words, depending on the way index creation and shard allocation are configured in the OpenSearch domain, all of the available index shards could be used long before the data nodes run out of disk space and begin exhibiting disk-related performance issues (e.g. nodes crashing, data corruption, or the dashboard going offline). +In other words, depending on the way index creation and shard allocation are configured in the OpenSearch domain, all of the available index shards could be used long before the data nodes run out of disk space and begin exhibiting disk-related performance issues (e.g. nodes crashing, data corruption, or the dashboard going offline). The primary issue that arises when a domain is out of available index shards is that new indexes can no longer be created (though logs can still be added to existing indexes). @@ -231,7 +232,7 @@ When that happens, the Fluent Bit OpenSearch output may begin showing confusing If any of those symptoms are present, consider using the OpenSearch domain's API endpoints to troubleshoot possible shard issues. -Running this command will show both the shard count and disk usage on all of the nodes in the domain. +Running this command will show both the shard count and disk usage on all of the nodes in the domain. ``` GET _cat/allocation?v ``` diff --git a/pipeline/outputs/opentelemetry.md b/pipeline/outputs/opentelemetry.md index 6b70d56cf..4007955bd 100644 --- a/pipeline/outputs/opentelemetry.md +++ b/pipeline/outputs/opentelemetry.md @@ -15,13 +15,29 @@ Important Note: At the moment only HTTP endpoints are supported. | http_passwd | Basic Auth Password. Requires HTTP_user to be set | | | port | TCP port of the target HTTP Server | 80 | | proxy | Specify an HTTP Proxy. The expected format of this value is `http://HOST:PORT`. Note that HTTPS is **not** currently supported. It is recommended not to set this and to configure the [HTTP proxy environment variables](https://docs.fluentbit.io/manual/administration/http-proxy) instead as they support both HTTP and HTTPS. | | +| http2 | Defines whether HTTP/2 protocol is enabled. This setting also supports the `force` option, which forces HTTP/2 over a plaintext connection. | On | +| grpc | Enables gRPC over an HTTP/2 connection. This setting applies to HTTP/2 only. | off | | metrics_uri | Specify an optional HTTP URI for the target web server listening for metrics, e.g: /v1/metrics | / | | logs_uri | Specify an optional HTTP URI for the target web server listening for logs, e.g: /v1/logs | / | | traces_uri | Specify an optional HTTP URI for the target web server listening for traces, e.g: /v1/traces | / | | header | Add a HTTP header key/value pair. Multiple headers can be set. | | | log_response_payload | Log the response payload within the Fluent Bit log | false | +| logs_body_key | The log body key to look up in the log events body/message. Sets the Body field of the opentelemtry logs data model. | message | +| logs_trace_id_message_key | The trace id key to look up in the log events body/message. Sets the TraceId field of the opentelemtry logs data model. | traceId | +| logs_span_id_message_key | The span id key to look up in the log events body/message. Sets the SpanId field of the opentelemtry logs data model. | spanId | +| logs_severity_text_message_key | The severity text id key to look up in the log events body/message. Sets the SeverityText field of the opentelemtry logs data model. | severityText | +| logs_severity_number_message_key | The severity number id key to look up in the log events body/message. Sets the SeverityNumber field of the opentelemtry logs data model. | severityNumber | | add_label | This allows you to add custom labels to all metrics exposed through the OpenTelemetry exporter. You may have multiple of these fields | | | compress | Set payload compression mechanism. Option available is 'gzip' | | +| logs_observed_timestamp_metadata_key | Specify an ObservedTimestamp key to look up in the metadata. | $ObservedKey | +| logs_timestamp_metadata_key |Specify a Timestamp key to look up in the metadata. | $Timestamp | +| logs_severity_key_metadata_key |Specify a SeverityText key to look up in the metadata.| $SeverityText | +| logs_severity_number_metadata_key | Specify a SeverityNumber key to look up in the metadata.| $SeverityNumber | +| logs_trace_flags_metadata_key |Specify a Flags key to look up in the metadata.| $Flags | +| logs_span_id_metadata_key |Specify a SpanId key to look up in the metadata.| $SpanId | +| logs_trace_id_metadata_key |Specify a TraceId key to look up in the metadata.| $TraceId | +| logs_attributes_metadata_key |Specify an Attributes key to look up in the metadata.| $Attributes | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ## Getting Started @@ -62,6 +78,11 @@ The OpenTelemetry plugin works with logs and only the metrics collected from one Log_response_payload True Tls On Tls.verify Off + logs_body_key $message + logs_span_id_message_key span_id + logs_trace_id_message_key trace_id + logs_severity_text_message_key loglevel + logs_severity_number_message_key lognum # add user-defined labels add_label app fluent-bit add_label color blue diff --git a/pipeline/outputs/postgresql.md b/pipeline/outputs/postgresql.md index 6bb581ed8..16eac7ffc 100644 --- a/pipeline/outputs/postgresql.md +++ b/pipeline/outputs/postgresql.md @@ -62,6 +62,7 @@ Make sure that the `fluentbit` user can connect to the `fluentbit` database on t | `min_pool_size` | Minimum number of connection in async mode | 1 | | `max_pool_size` | Maximum amount of connections in async mode | 4 | | `cockroachdb` | Set to `true` if you will connect the plugin with a CockroachDB | false | +| `workers` | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ### Libpq @@ -129,4 +130,3 @@ Here follows a list of useful resources from the PostgreSQL documentation: * [libpq - Environment variables](https://www.postgresql.org/docs/current/libpq-envars.html) * [libpq - password file](https://www.postgresql.org/docs/current/libpq-pgpass.html) * [Trigger functions](https://www.postgresql.org/docs/current/plpgsql-trigger.html) - diff --git a/pipeline/outputs/prometheus-exporter.md b/pipeline/outputs/prometheus-exporter.md index 7db7c6d2d..feac59d76 100644 --- a/pipeline/outputs/prometheus-exporter.md +++ b/pipeline/outputs/prometheus-exporter.md @@ -4,7 +4,7 @@ description: An output plugin to expose Prometheus Metrics # Prometheus Exporter -The prometheus exporter allows you to take metrics from Fluent Bit and expose them such that a Prometheus instance can scrape them. +The prometheus exporter allows you to take metrics from Fluent Bit and expose them such that a Prometheus instance can scrape them. Important Note: The prometheus exporter only works with metric plugins, such as Node Exporter Metrics @@ -13,6 +13,7 @@ Important Note: The prometheus exporter only works with metric plugins, such as | host | This is address Fluent Bit will bind to when hosting prometheus metrics. Note: `listen` parameter is deprecated from v1.9.0. | 0.0.0.0 | | port | This is the port Fluent Bit will bind to when hosting prometheus metrics | 2021 | | add\_label | This allows you to add custom labels to all metrics exposed through the prometheus exporter. You may have multiple of these fields | | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `1` | ## Getting Started diff --git a/pipeline/outputs/prometheus-remote-write.md b/pipeline/outputs/prometheus-remote-write.md index 0d430457e..b866f7193 100644 --- a/pipeline/outputs/prometheus-remote-write.md +++ b/pipeline/outputs/prometheus-remote-write.md @@ -25,7 +25,7 @@ Important Note: The prometheus exporter only works with metric plugins, such as | header | Add a HTTP header key/value pair. Multiple headers can be set. | | | log_response_payload | Log the response payload within the Fluent Bit log | false | | add_label | This allows you to add custom labels to all metrics exposed through the prometheus exporter. You may have multiple of these fields | | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 2 | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `2` | ## Getting Started @@ -93,7 +93,7 @@ With Logz.io [hosted prometheus](https://logz.io/solutions/infrastructure-monito [OUTPUT] name prometheus_remote_write host listener.logz.io - port 8053 + port 8053 match * header Authorization Bearer <LOGZIO Key> tls on @@ -109,7 +109,7 @@ With [Coralogix Metrics](https://coralogix.com/platform/metrics/) you may need t [OUTPUT] name prometheus_remote_write host metrics-api.coralogix.com - uri prometheus/api/v1/write?appLabelName=path&subSystemLabelName=path&severityLabelName=severity + uri prometheus/api/v1/write?appLabelName=path&subSystemLabelName=path&severityLabelName=severity match * port 443 tls on @@ -133,3 +133,25 @@ With [Levitate](https://last9.io/levitate-tsdb), you must use the Levitate clust http_user <Levitate Cluster Username> http_passwd <Levitate Cluster Password> ``` + +### Add Prometheus like Labels + +Ordinary prometheus clients add some of the labels as below: + +``` +[OUTPUT] + Name prometheus_remote_write + Match your.metric + Host xxxxxxx.yyyyy.zzzz + Port 443 + Uri /api/v1/write + Header Authorization Bearer YOUR_LICENSE_KEY + Log_response_payload True + Tls On + Tls.verify On + # add user-defined labels + add_label instance ${HOSTNAME} + add_label job fluent-bit +``` + +`instance` label can be emulated with `add_label instance ${HOSTNAME}`. And other labels can be added with `add_label <key> <value>` setting. diff --git a/pipeline/outputs/s3.md b/pipeline/outputs/s3.md index 469123d87..e1346bd0a 100644 --- a/pipeline/outputs/s3.md +++ b/pipeline/outputs/s3.md @@ -1,105 +1,157 @@ --- -description: Send logs, data, metrics to Amazon S3 +description: Send logs, data, and metrics to Amazon S3 --- # Amazon S3 -.png>) +.png>) -The Amazon S3 output plugin allows you to ingest your records into the [S3](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/WhatIsCloudWatchLogs.html) cloud object store. +The Amazon S3 output plugin lets you ingest records into the +[S3](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/WhatIsCloudWatchLogs.html) +cloud object store. -The plugin can upload data to S3 using the [multipart upload API](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html) or using S3 [PutObject](https://docs.aws.amazon.com/AmazonS3/latest/API/API\_PutObject.html). Multipart is the default and is recommended; Fluent Bit will stream data in a series of 'parts'. This limits the amount of data it has to buffer on disk at any point in time. By default, every time 5 MiB of data have been received, a new 'part' will be uploaded. The plugin can create files up to gigabytes in size from many small chunks/parts using the multipart API. All aspects of the upload process are configurable using the configuration options. +The plugin can upload data to S3 using the +[multipart upload API](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html) +or [`PutObject`](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html). +Multipart is the default and is recommended. Fluent Bit will stream data in a series +of _parts_. This limits the amount of data buffered on disk at any point in time. +By default, every time 5 MiB of data have been received, a new part will be uploaded. +The plugin can create files up to gigabytes in size from many small chunks or parts +using the multipart API. All aspects of the upload process are configurable. -The plugin allows you to specify a maximum file size, and a timeout for uploads. A file will be created in S3 when the max size is reached, or the timeout is reached- whichever comes first. +The plugin lets you specify a maximum file size, and a timeout for uploads. A +file will be created in S3 when the maximum size or the timeout is reached, whichever +comes first. Records are stored in files in S3 as newline delimited JSON. -See [here](https://github.com/fluent/fluent-bit-docs/tree/43c4fe134611da471e706b0edb2f9acd7cdfdbc3/administration/aws-credentials.md) for details on how AWS credentials are fetched. +See [AWS +Credentials](https://github.com/fluent/fluent-bit-docs/tree/43c4fe134611da471e706b0edb2f9acd7cdfdbc3/administration/aws-credentials.md) +for details about fetching AWS credentials. -**NOTE**: _The_ [_Prometheus success/retry/error metrics values_](administration/monitoring.md) _outputted by Fluent Bit's built-in http server are meaningless for the S3 output_. This is because S3 has its own buffering and retry mechanisms. The Fluent Bit AWS S3 maintainers apologize for this feature gap; you can [track our progress fixing it on GitHub](https://github.com/fluent/fluent-bit/issues/6141). +{% hint style="info" %} +The [Prometheus success/retry/error metrics values](administration/monitoring.md) +output by the built-in HTTP server in Fluent Bit are meaningless for S3 output. S3 has +its own buffering and retry mechanisms. The Fluent Bit AWS S3 maintainers apologize +for this feature gap; you can [track issue progress on GitHub](https://github.com/fluent/fluent-bit/issues/6141). +{% endhint %} ## Configuration Parameters -| Key | Description | Default | -|----------------------------------| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------- | -| region | The AWS region of your S3 bucket | us-east-1 | -| bucket | S3 Bucket name | None | -| json\_date\_key | Specify the name of the time key in the output record. To disable the time key just set the value to `false`. | date | -| json\_date\_format | Specify the format of the date. Supported formats are _double_, _epoch_, _iso8601_ (eg: _2018-05-30T09:39:52.000681Z_) and _java\_sql\_timestamp_ (eg: _2018-05-30 09:39:52.000681_) | iso8601 | -| total\_file\_size | Specifies the size of files in S3. Minimum size is 1M. With `use_put_object On` the maximum size is 1G. With multipart upload mode, the maximum size is 50G. | 100M | -| upload\_chunk\_size | The size of each 'part' for multipart uploads. Max: 50M | 5,242,880 bytes | -| upload\_timeout | Whenever this amount of time has elapsed, Fluent Bit will complete an upload and create a new file in S3. For example, set this value to 60m and you will get a new file every hour. | 10m | -| store\_dir | Directory to locally buffer data before sending. When multipart uploads are used, data will only be buffered until the `upload_chunk_size` is reached. S3 will also store metadata about in progress multipart uploads in this directory; this allows pending uploads to be completed even if Fluent Bit stops and restarts. It will also store the current $INDEX value if enabled in the S3 key format so that the $INDEX can keep incrementing from its previous value after Fluent Bit restarts. | /tmp/fluent-bit/s3 | -| store\_dir\_limit\_size | The size of the limitation for disk usage in S3. Limit the amount of s3 buffers in the `store_dir` to limit disk usage. Note: Use `store_dir_limit_size` instead of `storage.total_limit_size` which can be used to other plugins, because S3 has its own buffering system. | 0, which means unlimited | -| s3\_key\_format | Format string for keys in S3. This option supports a UUID, strftime time formatters, a syntax for selecting parts of the Fluent log tag using a syntax inspired by the rewrite\_tag filter. Add $UUID in the format string to insert a random string. Add $INDEX in the format string to insert an integer that increments each upload. The $INDEX value will be saved in the store\_dir so that if Fluent Bit restarts the value will keep incrementing from the previous run. Add $TAG in the format string to insert the full log tag; add $TAG\[0] to insert the first part of the tag in the s3 key. The tag is split into “parts” using the characters specified with the `s3_key_format_tag_delimiters` option. Add extension directly after the last piece of the format string to insert a key suffix. If you want to specify a key suffix and you are in `use_put_object` mode, you must specify $UUID as well. More explanations can be found in the S3 Key Format explainer section further down in this document. See the in depth examples and tutorial in the documentation. Time in s3\_key is the timestamp of the first record in the S3 file. | /fluent-bit-logs/$TAG/%Y/%m/%d/%H/%M/%S | -| s3\_key\_format\_tag\_delimiters | A series of characters which will be used to split the tag into 'parts' for use with the s3\_key\_format option. See the in depth examples and tutorial in the documentation. | . | -| static\_file\_path | Disables behavior where UUID string is automatically appended to end of S3 key name when $UUID is not provided in s3\_key\_format. $UUID, time formatters, $TAG, and other dynamic key formatters all work as expected while this feature is set to true. | false | -| use\_put\_object | Use the S3 PutObject API, instead of the multipart upload API. When this option is on, key extension is only available when $UUID is specified in `s3_key_format`. If $UUID is not included, a random string will be appended at the end of the format string and the key extension cannot be customized in this case. | false | -| role\_arn | ARN of an IAM role to assume (ex. for cross account access). | None | -| endpoint | Custom endpoint for the S3 API. An endpoint can contain scheme and port. | None | -| sts\_endpoint | Custom endpoint for the STS API. | None | -| profile | Option to specify an AWS Profile for credentials. | default | -| canned\_acl | [Predefined Canned ACL policy](https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl) for S3 objects. | None | -| compression | Compression type for S3 objects. 'gzip' is currently the only supported value by default. If Apache Arrow support was enabled at compile time, you can also use 'arrow'. For gzip compression, the Content-Encoding HTTP Header will be set to 'gzip'. Gzip compression can be enabled when `use_put_object` is 'on' or 'off' (PutObject and Multipart). Arrow compression can only be enabled with `use_put_object On`. | None | -| content\_type | A standard MIME type for the S3 object; this will be set as the Content-Type HTTP header. | None | -| send\_content\_md5 | Send the Content-MD5 header with PutObject and UploadPart requests, as is required when Object Lock is enabled. | false | -| auto\_retry\_requests | Immediately retry failed requests to AWS services once. This option does not affect the normal Fluent Bit retry mechanism with backoff. Instead, it enables an immediate retry with no delay for networking errors, which may help improve throughput when there are transient/random networking issues. | true | -| log\_key | By default, the whole log record will be sent to S3. If you specify a key name with this option, then only the value of that key will be sent to S3. For example, if you are using Docker, you can specify log\_key log and only the log message will be sent to S3. | None | -| preserve\_data\_ordering | Normally, when an upload request fails, there is a high chance for the last received chunk to be swapped with a later chunk, resulting in data shuffling. This feature prevents this shuffling by using a queue logic for uploads. | true | -| storage\_class | Specify the [storage class](https://docs.aws.amazon.com/AmazonS3/latest/API/API\_PutObject.html#AmazonS3-PutObject-request-header-StorageClass) for S3 objects. If this option is not specified, objects will be stored with the default 'STANDARD' storage class. | None | -| retry\_limit | Integer value to set the maximum number of retries allowed. Note: this configuration is released since version 1.9.10 and 2.0.1. For previous version, the number of retries is 5 and is not configurable. | 1 | -| external\_id | Specify an external ID for the STS API, can be used with the role\_arn parameter if your role requires an external ID. | None | +| Key | Description | Default | +|--------------------| --------------------------------- | ----------- | +| `region` | The AWS region of your S3 bucket. | `us-east-1` | +| `bucket` | S3 Bucket name | _none_ | +| `json_date_key` | Specify the time key name in the output record. To disable the time key, set the value to `false`. | `date` | +| `json_date_format` | Specify the format of the date. Accepted values: `double`, `epoch`, `iso8601` (2018-05-30T09:39:52.000681Z), `_java_sql_timestamp_` (2018-05-30 09:39:52.000681). | `iso8601` | +| `total_file_size` | Specify file size in S3. Minimum size is `1M`. With `use_put_object On` the maximum size is `1G`. With multipart uploads, the maximum size is `50G`. | `100M` | +| `upload_chunk_size` | The size of each part for multipart uploads. Max: 50M | 5,242,880 bytes | +| `upload_timeout` | When this amount of time elapses, Fluent Bit uploads and creates a new file in S3. Set to `60m` to upload a new file every hour. | `10m`| +| `store_dir` | Directory to locally buffer data before sending. When using multipart uploads, data buffers until reaching the `upload_chunk_size`. S3 stores metadata about in progress multipart uploads in this directory, allowing pending uploads to be completed if Fluent Bit stops and restarts. It stores the current `$INDEX` value if enabled in the S3 key format so the `$INDEX` keeps incrementing from its previous value after Fluent Bit restarts. | `/tmp/fluent-bit/s3` | +| `store_dir_limit_size` | Size limit for disk usage in S3. Limit theS3 buffers in the `store_dir` to limit disk usage. Use `store_dir_limit_size` instead of `storage.total_limit_size` which can be used for other plugins | `0` (unlimited) | +| `s3_key_format` | Format string for keys in S3. This option supports a UUID, strftime time formatters, a syntax for selecting parts of the Fluent log tag using a syntax inspired by the `rewrite_tag` filter. Add `$UUID` in the format string to insert a random string. Add `$INDEX` in the format string to insert an integer that increments each upload. The `$INDEX` value saves in the `store_dir`. Add `$TAG` in the format string to insert the full log tag. Add `$TAG[0]` to insert the first part of the tag in theS3 key. The tag is split into parts using the characters specified with the `s3_key_format_tag_delimiters` option. Add the extension directly after the last piece of the format string to insert a key suffix. To specify a key suffix in `use_put_object` mode, you must specify `$UUID`. See [S3 Key Format](#allowing-a-file-extension-in-the-s3-key-format-with-usduuid). Time in `s3_key` is the timestamp of the first record in the S3 file. | `/fluent-bit-logs/$TAG/%Y/%m/%d/%H/%M/%S` | +| `s3_key_format_tag_delimiters` | A series of characters used to split the tag into parts for use with `s3_key_format`. option. | `.` | +| `static_file_path` | Disables behavior where UUID string appends to the end of the S3 key name when `$UUID` isn't provided in `s3_key_format`. `$UUID`, time formatters, `$TAG`, and other dynamic key formatters all work as expected while this feature is set to true. | `false` | +| `use_put_object` | Use the S3 `PutObject` API instead of the multipart upload API. When enabled, the key extension is only available when `$UUID` is specified in `s3_key_format`. If `$UUID` isn't included, a random string appends format string and the key extension can't be customized. | `false` | +| `role_arn` | ARN of an IAM role to assume (for example, for cross account access.) | _none_ | +| `endpoint` | Custom endpoint for the S3 API. Endpoints can contain scheme and port. | _none_ | +| `sts_endpoint` | Custom endpoint for the STS API. | _none_ | +| `profile` | Option to specify an AWS Profile for credentials. | `default` | +| `canned_acl` | [Predefined Canned ACL policy](https://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl) for S3 objects. | _none_ | +| `compression` | Compression type for S3 objects. `gzip` is currently the only supported value by default. If Apache Arrow support was enabled at compile time, you can use `arrow`. For gzip compression, the Content-Encoding HTTP Header will be set to `gzip`. Gzip compression can be enabled when `use_put_object` is `on` or `off` (`PutObject` and Multipart). Arrow compression can only be enabled with `use_put_object On`. | _none_ | +| `content_type` | A standard MIME type for the S3 object, set as the Content-Type HTTP header. | _none_ | +| `send_content_md5` | Send the Content-MD5 header with `PutObject` and UploadPart requests, as is required when Object Lock is enabled. | `false` | +| `auto_retry_requests` | Immediately retry failed requests to AWS services once. This option doesn't affect the normal Fluent Bit retry mechanism with backoff. Instead, it enables an immediate retry with no delay for networking errors, which can help improve throughput during transient network issues. | `true` | +| `log_key` | By default, the whole log record will be sent to S3. When specifying a key name with this option, only the value of that key sends to S3. For example, when using Docker you can specify `log_key log` and only the log message sends to S3. | _none_ | +| `preserve_data_ordering` | When an upload request fails, the last received chunk might swap with a later chunk, resulting in data shuffling. This feature prevents shuffling by using a queue logic for uploads. | `true` | +| `storage_class` | Specify the [storage class](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html#AmazonS3-PutObject-request-header-StorageClass) for S3 objects. If this option isn't specified, objects store with the default `STANDARD` storage class. | _none_ | +| `retry_limit` | Integer value to set the maximum number of retries allowed. Requires versions 1.9.10 and 2.0.1 or later. For previous version, the number of retries is `5` and isn't configurable. | `1` | +| `external_id` | Specify an external ID for the STS API. Can be used with the `role_arn` parameter if your role requires an external ID. | _none_ | +| `workers` | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `1` | ## TLS / SSL -To skip TLS verification, set `tls.verify` as `false`. For more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +To skip TLS verification, set `tls.verify` as `false`. For more details about the +properties available and general configuration, refer to +[TLS/SSL](../../administration/transport-security.md). ## Permissions The plugin requires the following AWS IAM permissions: -``` +```text { - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Action": [ - "s3:PutObject" - ], - "Resource": "*" - }] + "Version": "2012-10-17", + "Statement": [{ + "Effect": "Allow", + "Action": [ + "s3:PutObject" + ], + "Resource": "*" + }] } ``` ## Differences between S3 and other Fluent Bit outputs -The s3 output plugin is special because its use case is to upload files of non-trivial size to an Amazon S3 bucket. This is in contrast to most other outputs which send many requests to upload data in batches of a few Megabytes or less. - -When Fluent Bit recieves logs, it stores them in chunks, either in memory or the filesystem depending on your settings. A chunk is usually around 2 MB in size. Fluent Bit sends the chunks in order to each output that matches their tag. Most outputs then send the chunk immediately to their destination. A chunk is sent to the output's "flush callback function", which must return one of `FLB_OK`, `FLB_RETRY`, or `FLB_ERROR`. Fluent Bit keeps count of the return values from each outputs "flush callback function"; these counters are the data source for Fluent Bit's error, retry, and success metrics available in prometheus format via its monitoring interface. - -The S3 output plugin is a Fluent Bit output plugin and thus it conforms to the Fluent Bit output plugin specification. However, since the S3 use case is to upload large files, generally much larger than 2 MB, its behavior is different. The S3 "flush callback function" simply buffers the incoming chunk to the filesystem, and returns an `FLB_OK`. _Consequently, the prometheus metrics available via the Fluent Bit http server are meaningless for S3._ In addition, the `storage.total_limit_size` parameter is not meaningful for S3 since it has its own buffering system in the `store_dir`. Instead, use `store_dir_limit_size`. Finally, *S3 always requires a write-able filesystem*; running Fluent Bit on a read-only filesystem will not work with the S3 output. - -S3 uploads are primarily initiated via the S3 "timer callback function", which runs separately from its "flush callback function". Because S3 has its own system of buffering and its own callback to upload data, the normal sequential data ordering of chunks provided by the Fluent Bit engine may be compromised. Consequently, S3 has the `presevere_data_ordering` option which will ensure data is uploaded in the original order it was collected by Fluent Bit. +The S3 output plugin is used to upload large files to an Amazon S3 bucket, while +most other outputs which send many requests to upload data in batches of a few +megabytes or less. + +When Fluent Bit receives logs, it stores them in chunks, either in memory or the +filesystem depending on your settings. Chunks are usually around 2 MB in size. +Fluent Bit sends chunks, in order, to each output that matches their tag. Most outputs +then send the chunk immediately to their destination. A chunk is sent to the output's +`flush` callback function, which must return one of `FLB_OK`, `FLB_RETRY`, or +`FLB_ERROR`. Fluent Bit keeps count of the return values from each output's +`flush` callback function. These counters are the data source for Fluent Bit error, retry, +and success metrics available in Prometheus format through its monitoring interface. + +The S3 output plugin conforms to the Fluent Bit output plugin specification. +Since S3's use case is to upload large files (over 2 MB), its behavior is different. +S3's `flush` callback function buffers the incoming chunk to the filesystem, and +returns an `FLB_OK`. This means Prometheus metrics available from the Fluent +Bit HTTP server are meaningless for S3. In addition, the `storage.total_limit_size` +parameter isn't meaningful for S3 since it has its own buffering system in the +`store_dir`. Instead, use `store_dir_limit_size`. S3 requires a writeable filesystem. +Running Fluent Bit on a read-only filesystem won't work with the S3 output. + +S3 uploads primarily initiate using the S3 +[`timer`](https://docs.aws.amazon.com/iotevents/latest/apireference/API_iotevents-data_Timer.html) +callback function, which runs separately from its `flush`. + +S3 has its own buffering system and its own callback to upload data, so the normal +sequential data ordering of chunks provided by the Fluent Bit engine can be +compromised. S3 has the `presevere_data_ordering` option which ensures data is +uploaded in the original order it was collected by Fluent Bit. ### Summary: Uniqueness in S3 Plugin -1. _The HTTP Monitoring interface output metrics are not meaningful for S3_: AWS understands that this is non-ideal; we have [opened an issue with a design](https://github.com/fluent/fluent-bit/issues/6141) that will allow S3 to manage its own output metrics. -2. _You must use `store_dir_limit_size` to limit the space on disk used by S3 buffer files_. -3. _The original ordering of data inputted to Fluent Bit may not be preserved unless you enable `preserve_data_ordering On`_. +- The HTTP Monitoring interface output metrics aren't meaningful for S3. AWS + understands that this is non-ideal. See the + [open issue and design](https://github.com/fluent/fluent-bit/issues/6141) + to allow S3 to manage its own output metrics. +- You must use `store_dir_limit_size` to limit the space on disk used by S3 buffer files. +- The original ordering of data inputted to Fluent Bit might not be preserved unless you enable +`preserve_data_ordering On`. ## S3 Key Format and Tag Delimiters -In Fluent Bit, all logs have an associated tag. The `s3_key_format` option lets you inject the tag into the s3 key using the following syntax: +In Fluent Bit, all logs have an associated tag. The `s3_key_format` option lets you +inject the tag into the S3 key using the following syntax: -* `$TAG` => the full tag -* `$TAG[n]` => the nth part of the tag (index starting at zero). This syntax is copied from the rewrite tag filter. By default, “parts” of the tag are separated with dots, but you can change this with `s3_key_format_tag_delimiters`. +- `$TAG`: The full tag. +- `$TAG[n]`: The nth part of the tag (index starting at zero). This syntax is copied + from the rewrite tag filter. By default, tag parts are separated with + dots, but you can change this with `s3_key_format_tag_delimiters`. -In the example below, assume the date is January 1st, 2020 00:00:00 and the tag associated with the logs in question is `my_app_name-logs.prod`. +In the following example, assume the date is `January 1st, 2020 00:00:00` and the tag +associated with the logs in question is `my_app_name-logs.prod`. -``` +```python [OUTPUT] - Name s3 - Match * + Name s3 + Match * bucket my-bucket region us-west-2 total_file_size 250M @@ -107,34 +159,49 @@ In the example below, assume the date is January 1st, 2020 00:00:00 and the tag s3_key_format_tag_delimiters .- ``` -With the delimiters as . and -, the tag will be split into parts as follows: +With the delimiters as `.` and `-`, the tag splits into parts as follows: -* `$TAG[0]` = my\_app\_name -* `$TAG[1]` = logs -* `$TAG[2]` = prod +- `$TAG[0]` = `my_app_name` +- `$TAG[1]` = `logs` +- `$TAG[2]` = `prod` -So the key in S3 will be `/prod/my_app_name/2020/01/01/00/00/00/bgdHN1NM.gz`. +The key in S3 will be `/prod/my_app_name/2020/01/01/00/00/00/bgdHN1NM.gz`. ### Allowing a file extension in the S3 Key Format with $UUID -The Fluent Bit S3 output was designed to ensure that previous uploads will never be over-written by a subsequent upload. Consequently, the `s3_key_format` supports time formatters, `$UUID`, and `$INDEX`. `$INDEX` is special because it is saved in the `store_dir`; if you restart Fluent Bit with the same disk, then it can continue incrementing the index from its last value in the previous run. +The Fluent Bit S3 output was designed to ensure that previous uploads will never be +overwritten by a subsequent upload. The `s3_key_format` supports time formatters, +`$UUID`, and `$INDEX`. `$INDEX` is special because it's saved in the `store_dir`. If +you restart Fluent Bit with the same disk, it can continue incrementing the +index from its last value in the previous run. -For files uploaded with the PutObject API, the S3 output requires that a unique random string be present in the S3 key. This is because many of the use cases for PutObject uploads involve a short time period between uploads such that a timestamp in the S3 key may not be unique enough between uploads. For example, if you only specify minute granularity timestamps in the S3 key, with a small upload size, it is possible to have two uploads that have timestamps set in the same minute. This "requirement" can be disabled with `static_file_path On`. +For files uploaded with the `PutObject` API, the S3 output requires that a unique +random string be present in the S3 key. Many of the use cases for +`PutObject` uploads involve a short time period between uploads, so a timestamp +in the S3 key might not be unique enough between uploads. For example, if you only +specify minute granularity timestamps in the S3 key, with a small upload size, it's +possible to have two uploads that have timestamps set in the same minute. This +requirement can be disabled with `static_file_path On`. -There are three cases where the PutObject API is used: +The `PutObject` API is used in these cases: -1. When you explicitly set `use_put_object On` -2. On startup when the S3 output finds old buffer files in the `store_dir` from a previous run and attempts to send all of them at once. -3. On shutdown, when to prevent data loss the S3 output attempts to send all currently buffered data at once. +- When you explicitly set `use_put_object On`. +- On startup when the S3 output finds old buffer files in the `store_dir` from + a previous run and attempts to send all of them at once. +- On shutdown. To prevent data loss the S3 output attempts to send all currently + buffered data at once. -Consequently, you should always specify `$UUID` somewhere in your S3 key format. Otherwise, if the PutObject API is used, S3 will append a random 8 character UUID to the end of your S3 key. This means that a file extension set at the end of an S3 key will have the random UUID appended to it. This behavior can be disabled with `static_file_path On`. +You should always specify `$UUID` somewhere in your S3 key format. Otherwise, if the +`PutObject` API is used, S3 appends a random eight-character UUID to the end of your +S3 key. This means that a file extension set at the end of an S3 key will have the +random UUID appended to it. Disabled this with `static_file_path On`. -Let's walk through this via an example. First case, we attempt to set a `.gz` extension without specifying `$UUID`. +This example attempts to set a `.gz` extension without specifying `$UUID`: -``` +```python [OUTPUT] - Name s3 - Match * + Name s3 + Match * bucket my-bucket region us-west-2 total_file_size 50M @@ -143,189 +210,259 @@ Let's walk through this via an example. First case, we attempt to set a `.gz` ex s3_key_format /$TAG/%Y/%m/%d/%H_%M_%S.gz ``` -In the case where pending data is uploaded on shutdown, if the tag was `app`, the S3 key in the S3 bucket might be: +In the case where pending data is uploaded on shutdown, if the tag was `app`, the S3 +key in the S3 bucket might be: -``` +```text /app/2022/12/25/00_00_00.gz-apwgylqg ``` -The S3 output appended a random string to the "extension", since this upload on shutdown used the PutObject API. - -There are two ways of disabling this behavior. Option 1, use `static_file_path`: - -``` -[OUTPUT] - Name s3 - Match * - bucket my-bucket - region us-west-2 - total_file_size 50M - use_put_object Off - compression gzip - s3_key_format /$TAG/%Y/%m/%d/%H_%M_%S.gz - static_file_path On -``` - -Option 2, explicitly define where the random UUID will go in the S3 key format: - -``` -[OUTPUT] - Name s3 - Match * - bucket my-bucket - region us-west-2 - total_file_size 50M - use_put_object Off - compression gzip - s3_key_format /$TAG/%Y/%m/%d/%H_%M_%S/$UUID.gz -``` +The S3 output appended a random string to the file extension, since this upload +on shutdown used the `PutObject` API. + +There are two ways of disabling this behavior: + +- Use `static_file_path`: + + ```python + [OUTPUT] + Name s3 + Match * + bucket my-bucket + region us-west-2 + total_file_size 50M + use_put_object Off + compression gzip + s3_key_format /$TAG/%Y/%m/%d/%H_%M_%S.gz + static_file_path On + ``` + +- Explicitly define where the random UUID will go in the S3 key format: + + ```python + [OUTPUT] + Name s3 + Match * + bucket my-bucket + region us-west-2 + total_file_size 50M + use_put_object Off + compression gzip + s3_key_format /$TAG/%Y/%m/%d/%H_%M_%S/$UUID.gz + ``` ## Reliability -The `store_dir` is used to temporarily store data before it is uploaded. If Fluent Bit is stopped suddenly it will try to send all data and complete all uploads before it shuts down. If it can not send some data, on restart it will look in the `store_dir` for existing data and will try to send it. - -Multipart uploads are ideal for most use cases because they allow the plugin to upload data in small chunks over time. For example, 1 GB file can be created from 200 5MB chunks. While the file size in S3 will be 1 GB, only 5 MB will be buffered on disk at any one point in time. - -There is one minor drawback to multipart uploads- the file and data will not be visible in S3 until the upload is completed with a [CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API\_CompleteMultipartUpload.html) call. The plugin will attempt to make this call whenever Fluent Bit is shut down to ensure your data is available in s3. It will also store metadata about each upload in the `store_dir`, ensuring that uploads can be completed when Fluent Bit restarts (assuming it has access to persistent disk and the `store_dir` files will still be present on restart). +The `store_dir` is used to temporarily store data before upload. If Fluent Bit +stops suddenly, it will try to send all data and complete all uploads before it +shuts down. If it can not send some data, on restart it will look in the `store_dir` +for existing data and try to send it. + +Multipart uploads are ideal for most use cases because they allow the plugin to +upload data in small chunks over time. For example, 1 GB file can be created +from 200 5 MB chunks. While the file size in S3 will be 1 GB, only +5 MB will be buffered on disk at any one point in time. + +One drawback to multipart uploads is that the file and data aren't visible in S3 +until the upload is completed with a +[CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html) +call. The plugin attempts to make this call whenever Fluent Bit is shut down to +ensure your data is available in S3. It also stores metadata about each upload in +the `store_dir`, ensuring that uploads can be completed when Fluent Bit restarts +(assuming it has access to persistent disk and the `store_dir` files will still be +present on restart). ### Using S3 without persisted disk -If you run Fluent Bit in an environment without persistent disk, or without the ability to restart Fluent Bit and give it access to the data stored in the `store_dir` from previous executions- some considerations apply. This might occur if you run Fluent Bit on [AWS Fargate](https://aws.amazon.com/fargate/). +If you run Fluent Bit in an environment without persistent disk, or without the +ability to restart Fluent Bit and give it access to the data stored in the +`store_dir` from previous executions, some considerations apply. This might occur if +you run Fluent Bit on [AWS Fargate](https://aws.amazon.com/fargate/). -In these situations, we recommend using the PutObject API, and sending data frequently, to avoid local buffering as much as possible. This will limit data loss in the event Fluent Bit is killed unexpectedly. +In these situations, Fluent Bits recommend using the `PutObject` API and sending data +frequently, to avoid local buffering as much as possible. This will limit data loss +in the event Fluent Bit is killed unexpectedly. The following settings are recommended for this use case: -``` +```python [OUTPUT] - Name s3 - Match * - bucket your-bucket - region us-east-1 - total_file_size 1M - upload_timeout 1m - use_put_object On + Name s3 + Match * + bucket your-bucket + region us-east-1 + total_file_size 1M + upload_timeout 1m + use_put_object On ``` ## S3 Multipart Uploads -With `use_put_object Off` (default), S3 will attempt to send files using multipart uploads. For each file, S3 first calls [CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html), then a series of calls to [UploadPart](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html) for each fragment (targeted to be `upload_chunk_size` bytes), and finally [CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html) to create the final file in S3. - -### Fallback to PutObject - -S3 [requires](https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html) each [UploadPart](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html) fragment to be at least 5,242,880 bytes, otherwise the upload is rejected. - -Consequently, the S3 output must sometimes fallback to the [PutObject API](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html). - -Uploads are triggered by three settings: -1. `total_file_size` and `upload_chunk_size`: When S3 has buffered data in the `store_dir` that meets the desired `total_file_size` (for `use_put_object On`) or the `upload_chunk_size` (for Multipart), it will trigger an upload operation. -2. `upload_timeout`: Whenever locally buffered data has been present on the filesystem in the `store_dir` longer than the configured `upload_timeout`, it will be sent. This happens regardless of whether or not the desired byte size has been reached. Consequently, if you configure a small `upload_timeout`, your files may be smaller than the `total_file_size`. The timeout is evaluated against the time at which S3 started buffering data for each unqiue tag (that is, the time when new data was buffered for the unique tag after the last upload). The timeout is also evaluated against the [CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html) time, so a multipart upload will be completed after `upload_timeout` has elapsed, even if the desired size has not yet been reached. - -If your `upload_timeout` triggers an upload before the pending buffered data reaches the `upload_chunk_size`, it may be too small for a multipart upload. S3 will consequently fallback to use the [PutObject API](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html). - -When you enable compression, S3 applies the compression algorithm at send time. The size settings noted above trigger uploads based on the size of buffered data, not the final compressed size. Consequently, it is possible that after compression, buffered data no longer meets the required minimum S3 [UploadPart](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html) size. If this occurs, you will see a log message like: - - -``` -[ info] [output:s3:s3.0] Pre-compression upload_chunk_size= 5630650, After compression, chunk is only 1063320 bytes, the chunk was too small, using PutObject to upload -``` - -If you encounter this frequently, use the numbers in the messages to guess your compression factor. For example, in this case, the buffered data was reduced from 5,630,650 bytes to 1,063,320 bytes. The compressed size is 1/5 the actual data size, so configuring `upload_chunk_size 30M` should ensure each part is large enough after compression to be over the min required part size of 5,242,880 bytes. - -The S3 API allows the last part in an upload to be less than the 5,242,880 byte minimum. Therefore, if a part is too small for an existing upload, the S3 output will upload that part and then complete the upload. - -### upload_timeout constrains total multipart upload time for a single file - -The `upload_timeout` is evaluated against the [CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html) time. So a multipart upload will be completed after `upload_timeout` has elapsed, even if the desired size has not yet been reached. +With `use_put_object Off` (default), S3 will attempt to send files using multipart +uploads. For each file, S3 first calls +[CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html), +then a series of calls to +[UploadPart](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html) for +each fragment (targeted to be `upload_chunk_size` bytes), and finally +[CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html) +to create the final file in S3. + +### Fallback to `PutObject` + +S3 [requires](https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html) each +[UploadPart](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html) +fragment to be at least 5,242,880 bytes, otherwise the upload is rejected. + +The S3 output must sometimes fallback to the [`PutObject` +API](https://docs.aws.amazon.com/AmazonS3/latest/API/API_`PutObject`.html). + +Uploads are triggered by these settings: + +- `total_file_size` and `upload_chunk_size`: When S3 has buffered data in the + `store_dir` that meets the desired `total_file_size` (for `use_put_object On`) or + the `upload_chunk_size` (for Multipart), it will trigger an upload operation. +- `upload_timeout`: Whenever locally buffered data has been present on the filesystem + in the `store_dir` longer than the configured `upload_timeout`, it will be sent + even when the desired byte size hasn't been reached. + If you configure a small `upload_timeout`, your files can be smaller + than the `total_file_size`. The timeout is evaluated against the time at which S3 + started buffering data for each unique tag (that is, the time when new data was + buffered for the unique tag after the last upload). The timeout is also evaluated + against the + [CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html) + time, so a multipart upload will be completed after `upload_timeout` has elapsed, + even if the desired size hasn't yet been reached. + +If your `upload_timeout` triggers an upload before the pending buffered data reaches +the `upload_chunk_size`, it might be too small for a multipart upload. S3 will +fallback to use the [`PutObject` API](https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html). + +When you enable compression, S3 applies the compression algorithm at send time. The +size settings trigger uploads based on the size of buffered data, not the +final compressed size. It's possible that after compression, buffered data no longer +meets the required minimum S3 +[UploadPart](https://docs.aws.amazon.com/AmazonS3/latest/API/API_UploadPart.html) +size. If this occurs, you will see a log message like: + +```text +[ info] [output:s3:s3.0] Pre-compression upload_chunk_size= 5630650, After +compression, chunk is only 1063320 bytes, the chunk was too small, using PutObject to upload +``` + +If you encounter this frequently, use the numbers in the messages to guess your +compression factor. In this example, the buffered data was reduced from +5,630,650 bytes to 1,063,320 bytes. The compressed size is one-fifth the actual data size. +Configuring `upload_chunk_size 30M` should ensure each part is large enough after +compression to be over the minimum required part size of 5,242,880 bytes. + +The S3 API allows the last part in an upload to be less than the 5,242,880 byte +minimum. If a part is too small for an existing upload, the S3 output will +upload that part and then complete the upload. + +### `upload_timeout` constrains total multipart upload time for a single file + +The `upload_timeout` evaluated against the +[CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html) +time. A multipart upload will be completed after `upload_timeout` elapses, even if +the desired size has not yet been reached. ### Completing uploads -When [CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html) is called, an `UploadID` is returned. S3 stores these IDs for active uploads in the `store_dir`. Until [CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html) is called, the uploaded data will not be visible in S3. +When +[CreateMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html) +is called, an `UploadID` is returned. S3 stores these IDs for active uploads in the +`store_dir`. Until +[CompleteMultipartUpload](https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html) +is called, the uploaded data isn't visible in S3. + +On shutdown, S3 output attempts to complete all pending uploads. If an upload fails +to complete, the ID remains buffered in the `store_dir` in a directory called +`multipart_upload_metadata`. If you restart the S3 output with the same `store_dir` +it will discover the old UploadIDs and complete the pending uploads. The [S3 +documentation](https://aws.amazon.com/blogs/aws-cloud-financial-management/discovering-and-deleting-incomplete-multipart-uploads-to-lower-amazon-s3-costs/) +has suggestions on discovering and deleting or completing dangling uploads in your +buckets. -On shutdown, S3 output will attempt to complete all pending uploads. If it fails to complete an upload, the ID will remain buffered in the `store_dir` in a directory called `multipart_upload_metadata`. If you restart the S3 output with the same `store_dir` it will discover the old UploadIDs and complete the pending uploads. The [S3 documentation](https://aws.amazon.com/blogs/aws-cloud-financial-management/discovering-and-deleting-incomplete-multipart-uploads-to-lower-amazon-s3-costs/) also has suggestions on discovering and deleting/completing dangling uploads in your buckets. +## Usage with MinIO -## Worker support +[MinIO](https://min.io/) is a high-performance, S3 compatible object storage and you +can build your app with S3 functionality without S3. -Fluent Bit 1.7 adds a new feature called `workers` which enables outputs to have dedicated threads. This `s3` plugin has partial support for workers. **The plugin can only support a single worker; enabling multiple workers will lead to errors/indeterminate behavior.** +The following example runs [a MinIO server](https://docs.min.io/docs/minio-quickstart-guide.html) +at `localhost:9000`, and create a bucket of `your-bucket`. Example: -``` +```python [OUTPUT] - Name s3 - Match * - bucket your-bucket - region us-east-1 - total_file_size 1M - upload_timeout 1m - use_put_object On - workers 1 + Name s3 + Match * + bucket your-bucket + endpoint http://localhost:9000 ``` -If you enable a single worker, you are enabling a dedicated thread for your S3 output. We recommend starting without workers, evaluating the performance, and then enabling a worker if needed. For most users, the plugin can provide sufficient throughput without workers. +The records store in the MinIO server. -## Usage with MinIO +## Usage with Google Cloud -[MinIO](https://min.io/) is a high-performance, S3 compatible object storage and you can build your app with S3 functionality without S3. - -Assume you run [a MinIO server](https://docs.min.io/docs/minio-quickstart-guide.html) at localhost:9000, and create a bucket of `your-bucket` by referring [the client docs](https://docs.min.io/docs/minio-client-quickstart-guide). +You can send your S3 output to Google. You must generate HMAC keys on GCS and use +those keys for `access-key` and `access-secret`. Example: -``` +```python [OUTPUT] - Name s3 - Match * - bucket your-bucket - endpoint http://localhost:9000 + Name s3 + Match * + bucket your-bucket + endpoint https://storage.googleapis.com ``` -Then, the records will be stored into the MinIO server. +## Get Started -## Getting Started - -In order to send records into Amazon S3, you can run the plugin from the command line or through the configuration file. +To send records into Amazon S3, you can run the plugin from the command line or +through the configuration file. ### Command Line -The **s3** plugin, can read the parameters from the command line through the **-p** argument (property), e.g: +The S3 plugin reads parameters from the command line through the `-p` argument: -``` -$ fluent-bit -i cpu -o s3 -p bucket=my-bucket -p region=us-west-2 -p -m '*' -f 1 +```text +fluent-bit -i cpu -o s3 -p bucket=my-bucket -p region=us-west-2 -p -m '*' -f 1 ``` ### Configuration File -In your main configuration file append the following _Output_ section: +In your main configuration file append the following `Output` section: -``` +```python [OUTPUT] - Name s3 - Match * - bucket your-bucket - region us-east-1 - store_dir /home/ec2-user/buffer - total_file_size 50M - upload_timeout 10m + Name s3 + Match * + bucket your-bucket + region us-east-1 + store_dir /home/ec2-user/buffer + total_file_size 50M + upload_timeout 10m ``` -An example that using PutObject instead of multipart: +An example using `PutObject` instead of multipart: -``` +```python [OUTPUT] - Name s3 - Match * - bucket your-bucket - region us-east-1 - store_dir /home/ec2-user/buffer - use_put_object On - total_file_size 10M - upload_timeout 10m + Name s3 + Match * + bucket your-bucket + region us-east-1 + store_dir /home/ec2-user/buffer + use_put_object On + total_file_size 10M + upload_timeout 10m ``` ## AWS for Fluent Bit -Amazon distributes a container image with Fluent Bit and this plugins. +Amazon distributes a container image with Fluent Bit and plugins. ### GitHub @@ -333,76 +470,91 @@ Amazon distributes a container image with Fluent Bit and this plugins. ### Amazon ECR Public Gallery -[aws-for-fluent-bit](https://gallery.ecr.aws/aws-observability/aws-for-fluent-bit) +Images are available in the Amazon ECR Public Gallery as +[aws-for-fluent-bit](https://gallery.ecr.aws/aws-observability/aws-for-fluent-bit). -Our images are available in Amazon ECR Public Gallery. You can download images with different tags by following command: +You can download images with different tags using the following command: -``` +```text docker pull public.ecr.aws/aws-observability/aws-for-fluent-bit:<tag> ``` -For example, you can pull the image with latest version by: +For example, you can pull the image with latest version with: -``` +```text docker pull public.ecr.aws/aws-observability/aws-for-fluent-bit:latest ``` -If you see errors for image pull limits, try log into public ECR with your AWS credentials: +If you see errors for image pull limits, try signing in to public ECR with your +AWS credentials: -``` +```text aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws ``` -You can check the [Amazon ECR Public official doc](https://docs.aws.amazon.com/AmazonECR/latest/public/get-set-up-for-amazon-ecr.html) for more details. +See the +[Amazon ECR Public official documentation](https://docs.aws.amazon.com/AmazonECR/latest/public/get-set-up-for-amazon-ecr.html) +for more details. ### Docker Hub [amazon/aws-for-fluent-bit](https://hub.docker.com/r/amazon/aws-for-fluent-bit/tags) +is also available from the Docker Hub. ### Amazon ECR -You can use our SSM Public Parameters to find the Amazon ECR image URI in your region: +Use Fluent Bit SSM Public Parameters to find the Amazon ECR image URI in your region: -``` +```text aws ssm get-parameters-by-path --path /aws/service/aws-for-fluent-bit/ ``` -For more see [the AWS for Fluent Bit github repo](https://github.com/aws/aws-for-fluent-bit#public-images). +For more information, see the +[AWS for Fluent Bit GitHub repository](https://github.com/aws/aws-for-fluent-bit#public-images). ## Advanced usage ### Use Apache Arrow for in-memory data processing -Starting from Fluent Bit v1.8, the Amazon S3 plugin includes the support for [Apache Arrow](https://arrow.apache.org/). The support is currently not enabled by default, as it depends on a shared version of `libarrow` as the prerequisite. +With Fluent Bit v1.8 or greater, the Amazon S3 plugin includes the support for +[Apache Arrow](https://arrow.apache.org/). Support isn't enabled by +default, and has a dependency on a shared version of `libarrow`. -To use this feature, `FLB_ARROW` must be turned on at compile time: +To use this feature, `FLB_ARROW` must be turned on at compile time. Use the following +commands: -``` -$ cd build/ -$ cmake -DFLB_ARROW=On .. -$ cmake --build . +```text +cd build/ +cmake -DFLB_ARROW=On .. +cmake --build . ``` -Once compiled, Fluent Bit can upload incoming data to S3 in Apache Arrow format. For example: +After being compiled, Fluent Bit can upload incoming data to S3 in Apache Arrow format. -``` +For example: + +```python [INPUT] - Name cpu + Name cpu [OUTPUT] - Name s3 - Bucket your-bucket-name - total_file_size 1M - use_put_object On - upload_timeout 60s - Compression arrow + Name s3 + Bucket your-bucket-name + total_file_size 1M + use_put_object On + upload_timeout 60s + Compression arrow ``` -As shown in this example, setting `Compression` to `arrow` makes Fluent Bit to convert payload into Apache Arrow format. +Setting `Compression` to `arrow` makes Fluent Bit convert payload into Apache Arrow +format. -The stored data is very easy to load, analyze and process using popular data processing tools (such as Python pandas, Apache Spark and Tensorflow). The following code uses `pyarrow` to analyze the uploaded data: +Load, analyze, and process stored data using popular data +processing tools such as Python pandas, Apache Spark and Tensorflow. -``` +The following example uses `pyarrow` to analyze the uploaded data: + +```text >>> import pyarrow.feather as feather >>> import pyarrow.fs as fs >>> @@ -410,7 +562,7 @@ The stored data is very easy to load, analyze and process using popular data pro >>> file = s3.open_input_file("my-bucket/fluent-bit-logs/cpu.0/2021/04/27/09/36/15-object969o67ZF") >>> df = feather.read_feather(file) >>> print(df.head()) - date cpu_p user_p system_p cpu0.p_cpu cpu0.p_user cpu0.p_system + date cpu_p user_p system_p cpu0.p_cpu cpu0.p_user cpu0.p_system 0 2021-04-27T09:33:53.539346Z 1.0 1.0 0.0 1.0 1.0 0.0 1 2021-04-27T09:33:54.539330Z 0.0 0.0 0.0 0.0 0.0 0.0 2 2021-04-27T09:33:55.539305Z 1.0 0.0 1.0 1.0 0.0 1.0 diff --git a/pipeline/outputs/skywalking.md b/pipeline/outputs/skywalking.md index 9919567a5..1d0f42925 100644 --- a/pipeline/outputs/skywalking.md +++ b/pipeline/outputs/skywalking.md @@ -11,10 +11,12 @@ The **Apache SkyWalking** output plugin, allows to flush your records to a [Apac | auth_token | Authentication token if needed for Apache SkyWalking OAP | None | | svc_name | Service name that fluent-bit belongs to | sw-service | | svc_inst_name | Service instance name of fluent-bit | fluent-bit | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ### TLS / SSL -Apache SkyWalking output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The Apache SkyWalking output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ## Getting Started @@ -57,6 +59,6 @@ This message is packed into the following protocol format and written to the OAP "json": { "json": "{\"log\": \"This is the original log message\"}" } - } + } }] ``` diff --git a/pipeline/outputs/slack.md b/pipeline/outputs/slack.md index 0ef7d9d9d..5cbee7f03 100644 --- a/pipeline/outputs/slack.md +++ b/pipeline/outputs/slack.md @@ -17,6 +17,7 @@ Once you have obtained the Webhook address you can place it in the configuration | Key | Description | Default | | :--- | :--- | :--- | | webhook | Absolute address of the Webhook provided by Slack | | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ### Configuration File @@ -28,4 +29,3 @@ Get started quickly with this configuration file: match * webhook https://hooks.slack.com/services/T00000000/B00000000/XXXXXXXXXXXXXXXXXXXXXXXX ``` - diff --git a/pipeline/outputs/splunk.md b/pipeline/outputs/splunk.md index 6b18babec..5c752409a 100644 --- a/pipeline/outputs/splunk.md +++ b/pipeline/outputs/splunk.md @@ -23,7 +23,7 @@ Connectivity, transport and authentication configuration properties: | compress | Set payload compression mechanism. The only available option is `gzip`. | | | channel | Specify X-Splunk-Request-Channel Header for the HTTP Event Collector interface. | | | http_debug_bad_request | If the HTTP server response code is 400 (bad request) and this flag is enabled, it will print the full HTTP request and response to the stdout interface. This feature is available for debugging purposes. | | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 2 | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `2` | Content and Splunk metadata \(fields\) handling configuration properties: @@ -41,7 +41,8 @@ Content and Splunk metadata \(fields\) handling configuration properties: ### TLS / SSL -Splunk output plugin supports TLS/SSL, for more details about the properties available and general configuration, please refer to the [TLS/SSL](../../administration/transport-security.md) section. +The Splunk output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ## Getting Started @@ -125,7 +126,7 @@ This will create a payload that looks like: } ``` -For more information on the Splunk HEC payload format and all event meatadata Splunk accepts, see here: [http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC](http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC) +For more information on the Splunk HEC payload format and all event metadata Splunk accepts, see here: [http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC](http://docs.splunk.com/Documentation/Splunk/latest/Data/AboutHEC) ### Sending Raw Events @@ -168,9 +169,9 @@ The following configuration gathers CPU metrics, nests the appropriate field, ad name cpu tag cpu -# Move CPU metrics to be nested under "fields" and +# Move CPU metrics to be nested under "fields" and # add the prefix "metric_name:" to all metrics -# NOTE: you can change Wildcard field to only select metric fields +# NOTE: you can change Wildcard field to only select metric fields [FILTER] Name nest Match cpu @@ -183,18 +184,18 @@ The following configuration gathers CPU metrics, nests the appropriate field, ad [FILTER] Name modify Match cpu - Set index cpu-metrics + Set index cpu-metrics Set source fluent-bit Set sourcetype custom # ensure splunk_send_raw is on [OUTPUT] - name splunk + name splunk match * host <HOST> port 8088 splunk_send_raw on - splunk_token f9bd5bdb-c0b2-4a83-bcff-9625e5e908db + splunk_token f9bd5bdb-c0b2-4a83-bcff-9625e5e908db tls on tls.verify off ``` diff --git a/pipeline/outputs/stackdriver.md b/pipeline/outputs/stackdriver.md index 85071b8fc..5b66e9c34 100644 --- a/pipeline/outputs/stackdriver.md +++ b/pipeline/outputs/stackdriver.md @@ -32,10 +32,12 @@ Before to get started with the plugin configuration, make sure to obtain the pro | severity\_key | Specify the name of the key from the original record that contains the severity information. | `logging.googleapis.com/severity`. See [Stackdriver Special Fields][StackdriverSpecialFields] for more info. | | project_id_key | The value of this field is used by the Stackdriver output plugin to find the gcp project id from jsonPayload and then extract the value of it to set the PROJECT_ID within LogEntry logName, which controls the gcp project that should receive these logs. | `logging.googleapis.com/projectId`. See [Stackdriver Special Fields][StackdriverSpecialFields] for more info. | | autoformat\_stackdriver\_trace | Rewrite the _trace_ field to include the projectID and format it for use with Cloud Trace. When this flag is enabled, the user can get the correct result by printing only the traceID (usually 32 characters). | false | -| Workers | Enables dedicated thread(s) for this output. | 1 | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `1` | | custom\_k8s\_regex | Set a custom regex to extract field like pod\_name, namespace\_name, container\_name and docker\_id from the local\_resource\_id in logs. This is helpful if the value of pod or node name contains dots. | `(?<pod_name>[a-z0-9](?:[-a-z0-9]*[a-z0-9])?(?:\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace_name>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$` | | resource_labels | An optional list of comma separated strings specifying resource labels plaintext assignments (`new=value`) and/or mappings from an original field in the log entry to a destination field (`destination=$original`). Nested fields and environment variables are also supported using the [record accessor syntax](https://docs.fluentbit.io/manual/administration/configuring-fluent-bit/classic-mode/record-accessor). If configured, *all* resource labels will be assigned using this API only, with the exception of `project_id`. See [Resource Labels](#resource-labels) for more details. | | | compress | Set payload compression mechanism. The only available option is `gzip`. Default = "", which means no compression.| | +| cloud\_logging\_base\_url | Set the base Cloud Logging API URL to use for the `/v2/entries:write` API request. | https://logging.googleapis.com | + ### Configuration File @@ -53,7 +55,7 @@ If you are using a _Google Cloud Credentials File_, the following configuration Example configuration file for k8s resource type: -local_resource_id is used by stackdriver output plugin to set the labels field for different k8s resource types. Stackdriver plugin will try to find the local_resource_id field in the log entry. If there is no field logging.googleapis.com/local_resource_id in the log, the plugin will then construct it by using the tag value of the log. +`local_resource_id` is used by the Stackdriver output plugin to set the labels field for different k8s resource types. Stackdriver plugin will try to find the `local_resource_id` field in the log entry. If there is no field `logging.googleapis.com/local_resource_id` in the log, the plugin will then construct it by using the tag value of the log. The local_resource_id should be in format: @@ -155,6 +157,15 @@ For instance, for a K8s resource type, `resource_labels` can be used in tandem w ``` `resource_labels` also supports validation for required labels based on the input resource type. This allows fluent-bit to check if all specified labels are present for a given configuration before runtime. If validation is not currently supported for a resource type that you would like to use this API with, we encourage you to open a pull request for it. Adding validation for a new resource type is simple - all that is needed is to specify the resources associated with the type alongside the required labels [here](https://github.com/fluent/fluent-bit/blob/master/plugins/out_stackdriver/stackdriver_resource_types.c#L27). + +## Log Name + +By default, the plugin will write to the following log name: +``` +/projects/<project ID>/logs/<log tag> +``` +You may be in a scenario where being more specific about the log name is important (for example [integration with Log Router rules](https://cloud.google.com/logging/docs/routing/overview) or [controlling cardinality of log based metrics]((https://cloud.google.com/logging/docs/logs-based-metrics/troubleshooting#too-many-time-series))). You can control the log name directly on a per-log basis by using the [`logging.googleapis.com/logName` special field][StackdriverSpecialFields]. You can configure a `log_name_key` if you'd like to use something different than `logging.googleapis.com/logName`, i.e. if the `log_name_key` is set to `mylognamefield` will extract the log name from `mylognamefield` in the log. + ## Troubleshooting Notes ### Upstream connection error @@ -189,7 +200,7 @@ Do following check: > Github reference: [#7552](https://github.com/fluent/fluent-bit/issues/7552) -When the number of Workers is greater than 1, Fluent Bit may interimittently crash. +When the number of Workers is greater than 1, Fluent Bit may intermittently crash. ## Other implementations diff --git a/pipeline/outputs/stackdriver_special_fields.md b/pipeline/outputs/stackdriver_special_fields.md index eff5d0ee4..3bb2b9cd3 100644 --- a/pipeline/outputs/stackdriver_special_fields.md +++ b/pipeline/outputs/stackdriver_special_fields.md @@ -5,96 +5,66 @@ When the [google-logging-agent](https://cloud.google.com/logging/docs/agent) rec ## Log Entry Fields Currently, we support some special fields in fluent-bit for setting fields on the LogEntry object: -| JSON log field | [LogEntry](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry) field | Description | -| :--- | :--- | :--- | -| logging.googleapis.com/operation | operation | Additional information about a potentially long-running operation | -| logging.googleapis.com/labels | labels | The value of this field should be a structured record | -| logging.googleapis.com/insertId | insertId | A unique identifier for the log entry. It is used to order logEntries | -| logging.googleapis.com/sourceLocation | sourceLocation | Additional information about the source code location that produced the log entry. | -| logging.googleapis.com/http_request | httpRequest | A common proto for logging HTTP requests. | -| logging.googleapis.com/trace | trace | Resource name of the trace associated with the log entry | -| logging.googleapis.com/traceSampled | traceSampled | The sampling decision associated with this log entry. | -| logging.googleapis.com/spanId | spanId | The ID of the trace span associated with this log entry. | -| timestamp | timestamp | An object including the seconds and nanos fields that represents the time | -| timestampSecond & timestampNanos | timestamp | The seconds and nanos that represents the time | +| JSON log field | [LogEntry](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry) field | type | Description | +| :--- | :--- | :--- | :--- | +| `logging.googleapis.com/logName` | `logName` | `string` | The log name to write this log to. | +| `logging.googleapis.com/labels` | `labels` | `object<string, string>` | The labels for this log. | +| `logging.googleapis.com/severity` | `severity` | [`LogSeverity` enum](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#LogSeverity) | The severity of this log. | +| `logging.googleapis.com/monitored_resource` | `resource` | [`MonitoredResource`](https://cloud.google.com/logging/docs/reference/v2/rest/v2/MonitoredResource) (without `type`) | Resource labels for this log. See [caveats](#monitored-resource). | +| `logging.googleapis.com/operation` | `operation` | [`LogEntryOperation`](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#LogEntryOperation) | Additional information about a potentially long-running operation. | +| `logging.googleapis.com/insertId` | `insertId` | `string` | A unique identifier for the log entry. It is used to order logEntries. | +| `logging.googleapis.com/sourceLocation` | `sourceLocation` | [`LogEntrySourceLocation`](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#LogEntrySourceLocation) | Additional information about the source code location that produced the log entry. | +| `logging.googleapis.com/http_request` | `httpRequest` | [`HttpRequest`](https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#HttpRequest) | A common proto for logging HTTP requests. | +| `logging.googleapis.com/trace` | `trace` | `string` | Resource name of the trace associated with the log entry. | +| `logging.googleapis.com/traceSampled` | `traceSampled` | boolean | The sampling decision associated with this log entry. | +| `logging.googleapis.com/spanId` | `spanId` | `string` | The ID of the trace span associated with this log entry. | +| `timestamp` | `timestamp` | `object` ([protobuf `Timestamp` object format](https://protobuf.dev/reference/protobuf/google.protobuf/#timestamp)) | An object including the seconds and nanos fields that represents the time. | +| `timestampSecond` & `timestampNanos` | `timestamp` | `int` | The seconds and nanos that represents the time. | ## Other Special Fields | JSON log field | Description | | :--- | :--- | -| logging.googleapis.com/projectId | Changes the project ID that this log will be written to. Ensure that you are authenticated to write logs to this project. | +| `logging.googleapis.com/projectId` | Changes the project ID that this log will be written to. Ensure that you are authenticated to write logs to this project. | +| `logging.googleapis.com/local_resource_id` | Overrides the [configured `local_resource_id`](./stackdriver.md#resource-labels). | -## Operation -Operation field contains additional information about a potentially long-running operation with which a log entry is associated. +## Using Special Fields -The JSON representation is as followed: -```text +To use a special field, you must add a field with the right name and value to your log. Given an example structured log (internally in MessagePack but shown in JSON for demonstration): +```json { - "id": string, - "producer": string, - "first": boolean, - "last": boolean + "log": "Hello world!" } ``` - -For example, when the jsonPayload contains the subfield `logging.googleapis.com/operation`: -```text -jsonPayload { - "logging.googleapis.com/operation": { - "id": "test_id", - "producer": "test_producer", - "first": true, - "last": true - } - ... +To use the `logging.googleapis.com/logName` special field, you would add it to your structured log as follows: +```json +{ + "log": "Hello world!", + "logging.googleapis.com/logName": "my_log" } ``` -the stackdriver output plugin will extract the operation field and remove it from jsonPayload. LogEntry will be: -```text +For the special fields that map to `LogEntry` protos, you will need to add them as objects with field names that match the proto. For example, to use the `logging.googleapis.com/operation`: +```json { - "jsonPayload": { - ... - } - "operation": { + "log": "Hello world!", + "logging.googleapis.com/operation": { "id": "test_id", "producer": "test_producer", "first": true, "last": true } - ... } ``` +Adding special fields to logs is best done through the [`modify` filter](https://docs.fluentbit.io/manual/pipeline/filters/modify) for simple fields, or [a Lua script using the `lua` filter](https://docs.fluentbit.io/manual/pipeline/filters/lua) for more complex fields. -### Use Cases -**1. If the subfields are empty or in incorrect type, stackdriver output plugin will set these subfields empty.** For example: -```text -jsonPayload { - "logging.googleapis.com/operation": { - "id": 123, #incorrect type - # no producer here - "first": true, - "last": true - } - ... -} -``` -the logEntry will be: +## Simple Type Special Fields + +For special fields with simple types (with the exception of the [`logging.googleapis.com/insertId` field](#insert-id)), they will follow this pattern (demonstrated with the `logging.googleapis.com/logName` field): + +1. If the special field matches the type, it will be moved to the corresponding LogEntry field. For example: ```text { - "jsonPayload": { - ... - } - "operation": { - "first": true, - "last": true - } - ... -} -``` -**2. If the `logging.googleapis.com/operation` itself is not a map, stackdriver output plugin will leave this field untouched.** For example: -```text -jsonPayload { - "logging.googleapis.com/operation": "some string", + "logging.googleapis.com/logName": "my_log" ... } ``` @@ -102,25 +72,17 @@ the logEntry will be: ```text { "jsonPayload": { - "logging.googleapis.com/operation": "some string", ... } + "logName": "my_log" ... } ``` -**3. If there are extra subfields, stackdriver output plugin will add operation field to logEntry and preserve the extra subfields in jsonPayload.** For example: -```text -jsonPayload { - "logging.googleapis.com/operation": { - "id": "test_id", - "producer": "test_producer", - "first": true, - "last": true, - "extra1": "some string", - "extra2": 123, - "extra3": true - } +2. If the field is non-empty but an invalid, it will be left in the jsonPayload. For example: +```text +{ + "logging.googleapis.com/logName": 12345 ... } ``` @@ -128,64 +90,57 @@ the logEntry will be: ```text { "jsonPayload": { - "logging.googleapis.com/operation": { - "extra1": "some string", - "extra2": 123, - "extra3": true - } + "logging.googleapis.com/logName": 12345 ... } - "operation": { - "id": "test_id", - "producer": "test_producer", - "first": true, - "last": true - } - ... } ``` -## Labels -labels field contains specific labels in a structured entry that will be added to LogEntry labels. -For example, when the jsonPayload contains the subfield `logging.googleapis.com/labels`: +### Exceptions + +#### Insert ID + +If the `logging.googleapis.com/insertId` field has an invalid type, the log will be rejected by the plugin and not sent to Cloud Logging. + +#### Trace Sampled + +If the [`autoformat_stackdriver_trace` plugin configuration option]() is set to `true`, the value provided in the `trace` field will be formatted into the format that Cloud Logging expects along with the detected Project ID (from the Google Metadata server, configured in the plugin, or provided via special field). + +For example, if `autoformat_stackdriver_trace` is enabled, this: ```text -jsonPayload { - "logging.googleapis.com/labels": { - "A": "valA", - "B": "valB", - "C": "valC" - } - ... +{ + "logging.googleapis.com/projectId": "my-project-id", + "logging.googleapis.com/trace": "12345" } ``` -the stackdriver output plugin will extract labels from the subfield `logging.googleapis.com/labels` and move it up from jsonPayload to LogEntry Labels. LogEntry will be: +Will become this: ```text { "jsonPayload": { ... - } - "labels": { - "A": "valA", - "B": "valB", - "C": "valC" - } - ... + }, + "projectId": "my-project-id", + "trace": "/projects/my-project-id/traces/12345" } ``` -## insertId -InsertId is a unique identifier for the log entry. It is used to order logEntries. +#### `timestampSecond` and `timestampNano` -The JSON representation is as followed: -```text -"insertId": string -``` +The `timestampSecond` and `timestampNano` fields don't map directly to the `timestamp` field in `LogEntry` so the parsing behaviour deviates from other special fields. Read more in the [Timestamp section](#timestamp). + +## Proto Special Fields + +For special fields that expect the format of a proto type from the `LogEntry` (with the exception of the `logging.googleapis.com/monitored_resource` field) will follow this pattern (demonstrated with the `logging.googleapis.com/operation` field): -### Use Cases -**1. If the insertId is a non-empty string.** For example: +If any subfields of the proto are empty or in incorrect type, the plugin will set these subfields empty. For example: ```text -jsonPayload { - "logging.googleapis.com/insertId": "test_id" +{ + "logging.googleapis.com/operation": { + "id": 123, #incorrect type + # no producer here + "first": true, + "last": true + } ... } ``` @@ -195,41 +150,18 @@ the logEntry will be: "jsonPayload": { ... } - "insertId": "test_id" - ... -} -``` - -**2. If the insertId is invalid (not non-empty string).** For example: -```text -jsonPayload { - "logging.googleapis.com/insertId": 12345 + "operation": { + "first": true, + "last": true + } ... } ``` -The logging agent will log an error and reject the entry. - -## SourceLocation -SourceLocation field contains additional information about the source code location that produced the log entry. The format. -The JSON representation is as followed: +If the field itself is not a map, the plugin will leave this field untouched. For example: ```text { - "file": string, - "line": string, - "function": string -} -``` - -### Use Cases -Set the input log as followed: -```text -jsonPayload { - "logging.googleapis.com/sourceLocation": { - "file": "my_file", - "line": 123, - "function": "foo()" - } + "logging.googleapis.com/operation": "some string", ... } ``` @@ -237,62 +169,25 @@ the logEntry will be: ```text { "jsonPayload": { + "logging.googleapis.com/operation": "some string", ... } - "sourceLocation": { - "file": "my_file", - "line": 123, - "function": "foo()" - } ... } ``` -## httpRequest -HttpRequest field is a common proto for logging HTTP requests. - -The JSON representation is as followed: +If there are extra subfields, the plugin will add the recognized fields to the corresponding field in the LogEntry, and preserve the extra subfields in jsonPayload. For example: ```text { - "requestMethod": string, - "requestUrl": string, - "requestSize": string, - "status": integer, - "responseSize": string, - "userAgent": string, - "remoteIp": string, - "serverIp": string, - "referer": string, - "latency": string, - "cacheLookup": boolean, - "cacheHit": boolean, - "cacheValidatedWithOriginServer": boolean, - "cacheFillBytes": string, - "protocol": string -} - -``` + "logging.googleapis.com/operation": { + "id": "test_id", + "producer": "test_producer", + "first": true, + "last": true, -### Use Cases -Set the input log as followed: -```text -jsonPayload { - "logging.googleapis.com/http_request": { - "requestMethod":"GET", - "requestUrl":"logging.googleapis.com", - "requestSize":"12", - "status":200, - "responseSize":"12", - "userAgent":"Mozilla", - "remoteIp":"255.0.0.1", - "serverIp":"255.0.0.1", - "referer":"referer", - "latency":"1s", - "cacheLookup":true, - "cacheHit":true, - "cacheValidatedWithOriginServer":true, - "cacheFillBytes":"12", - "protocol":"HTTP/1.2" + "extra1": "some string", + "extra2": 123, + "extra3": true } ... } @@ -301,64 +196,41 @@ the logEntry will be: ```text { "jsonPayload": { + "logging.googleapis.com/operation": { + "extra1": "some string", + "extra2": 123, + "extra3": true + } ... } - "httpRequest": { - "requestMethod":"GET", - "requestUrl":"logging.googleapis.com", - "requestSize":"12", - "status":200, - "responseSize":"12", - "userAgent":"Mozilla", - "remoteIp":"255.0.0.1", - "serverIp":"255.0.0.1", - "referer":"referer", - "latency":"1s", - "cacheLookup":true, - "cacheHit":true, - "cacheValidatedWithOriginServer":true, - "cacheFillBytes":"12", - "protocol":"HTTP/1.2" + "operation": { + "id": "test_id", + "producer": "test_producer", + "first": true, + "last": true } ... } ``` -## Trace +### Exceptions -TraceId is resource name of the trace associated with the log entry. -If enable autoformat_stackdriver_trace flag in config the entry will automatically get the projectID from the Google Metadata server and add it. +#### Monitored Resource ID -The JSON representation is as followed: -```text -"trace": string -``` +The `logging.googleapis.com/monitored_resource` field is parsed in a special way, meaning it has some important exceptions: -### Use Cases -Set the input log as followed: -```text -jsonPayload { - "logging.googleapis.com/trace": "0123456789abcdef0123456789abcdef" - ... -} -``` -the logEntry will be: -```text -{ - "jsonPayload": { - ... - } - "trace": "projects/your-project-name/traces/0123456789abcdef0123456789abcdef" - ... -} -``` +The `type` field from the [`MonitoredResource` proto]() is not parsed out of the special field. It is read from the [`resource` plugin configuration option](https://docs.fluentbit.io/manual/pipeline/outputs/stackdriver#configuration-parameters). If it is supplied in the `logging.googleapis.com/monitored_resource` special field, it will not be recognized. + +The `labels` field is expected to be an `object<string, string>`. If any fields have a value that is not a string, the value is ignored and not preserved. The plugin logs an error and drops the field. + +If no valid `labels` field is found, or if all of entries in the `labels` object provided are invalid, the `logging.googleapis.com/monitored_resource` field is dropped in favour of automatically setting resource labels using other available information based on the configured `resource` type. ## Timestamp We support two formats of time-related fields: Format 1 - timestamp: -JsonPayload contains a timestamp field that includes the seconds and nanos fields. +Log body contains a `timestamp` field that includes the seconds and nanos fields. ```text { "timestamp": { @@ -368,7 +240,7 @@ JsonPayload contains a timestamp field that includes the seconds and nanos field } ``` Format 2 - timestampSeconds/timestampNanos: -JsonPayload contains both the timestampSeconds and timestampNanos fields. +Log body contains both the `timestampSeconds` and `timestampNanos` fields. ```text { "timestampSeconds": CURRENT_SECONDS, @@ -377,16 +249,15 @@ JsonPayload contains both the timestampSeconds and timestampNanos fields. ``` -If one of the following JSON timestamp representations is present in a structured record, the Logging agent collapses them into a single representation in the timestamp field in the LogEntry object. +If one of the following JSON timestamp representations is present in a structured record, the plugin collapses them into a single representation in the timestamp field in the `LogEntry` object. -Without time-related fields, the logging agent will set the current time as timestamp. Supporting time-related fields enables users to get more information about the logEntry. +Without time-related fields, the plugin will set the current time as timestamp. +### Format 1 -### Use Cases -**Format 1** Set the input log as followed: ```text -jsonPayload { +{ "timestamp": { "seconds": 1596149787, "nanos": 12345 @@ -405,10 +276,11 @@ the logEntry will be: } ``` -**Format 2** +### Format 2 + Set the input log as followed: ```text -jsonPayload { +{ "timestampSeconds":1596149787, "timestampNanos": 12345 ... @@ -424,3 +296,5 @@ the logEntry will be: ... } ``` + +If the `timestamp` object or the `timestampSeconds` and `timestampNanos` fields end up being invalid, they will remain in the `jsonPayload` untouched. \ No newline at end of file diff --git a/pipeline/outputs/standard-output.md b/pipeline/outputs/standard-output.md index 98b663945..69e3e44f2 100644 --- a/pipeline/outputs/standard-output.md +++ b/pipeline/outputs/standard-output.md @@ -6,10 +6,10 @@ The **stdout** output plugin allows to print to the standard output the data rec | Key | Description | default | | :--- | :--- | :--- | -| Format | Specify the data format to be printed. Supported formats are _msgpack_ _json_, _json\_lines_ and _json\_stream_. | msgpack | +| Format | Specify the data format to be printed. Supported formats are _msgpack_, _json_, _json\_lines_ and _json\_stream_. | msgpack | | json\_date\_key | Specify the name of the time key in the output record. To disable the time key just set the value to `false`. | date | | json\_date\_format | Specify the format of the date. Supported formats are _double_, _epoch_, _iso8601_ (eg: _2018-05-30T09:39:52.000681Z_) and _java_sql_timestamp_ (eg: _2018-05-30 09:39:52.000681_) | double | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 1 | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `1` | ### Command Line @@ -35,4 +35,3 @@ Fluent Bit v1.x.x ``` No more, no less, it just works. - diff --git a/pipeline/outputs/syslog.md b/pipeline/outputs/syslog.md index 6b4403e9a..9cce9d2e5 100644 --- a/pipeline/outputs/syslog.md +++ b/pipeline/outputs/syslog.md @@ -2,7 +2,8 @@ The Syslog output plugin allows you to deliver messages to Syslog servers. It supports RFC3164 and RFC5424 formats through different transports such as UDP, TCP or TLS. -As of Fluent Bit v1.5.3 the configuration is very strict. You must be aware of the structure of your original record so you can configure the plugin to use specific keys to compose your outgoing Syslog message. +As of Fluent Bit v1.5.3 the configuration is very strict. +You must be aware of the structure of your original record so you can configure the plugin to use specific keys to compose your outgoing Syslog message. > Future versions of Fluent Bit are expanding this plugin feature set to support better handling of keys and message composing. @@ -12,7 +13,7 @@ As of Fluent Bit v1.5.3 the configuration is very strict. You must be aware of t | :--- | :--- | :--- | | host | Domain or IP address of the remote Syslog server. | 127.0.0.1 | | port | TCP or UDP port of the remote Syslog server. | 514 | -| mode | Desired transport type. Available options are `tcp`, `tls` and `udp`. | udp | +| mode | Desired transport type. Available options are `tcp` and `udp`. | udp | | syslog\_format | The Syslog protocol format to use. Available options are `rfc3164` and `rfc5424`. | rfc5424 | | syslog\_maxsize | The maximum size allowed per message. The value must be an integer representing the number of bytes allowed. If no value is provided, the default size is set depending of the protocol version specified by `syslog_format`.<br><br>`rfc3164` sets max size to 1024 bytes.<br><br>`rfc5424` sets the size to 2048 bytes. | | | syslog\_severity\_key | The key name from the original record that contains the Syslog severity number. This configuration is optional. | | @@ -27,9 +28,15 @@ As of Fluent Bit v1.5.3 the configuration is very strict. You must be aware of t | syslog\_procid\_preset | The preset process ID. It will be overwritten if `syslog_procid_key` is set and a key of a record is matched. This configuration is optional. | | | syslog\_msgid\_key | The key name from the original record that contains the Message ID associated to the message. This configuration is optional. | | | syslog\_msgid\_preset | The preset message ID. It will be overwritten if `syslog_msgid_key` is set and a key of a record is matched. This configuration is optional. | | -| syslog\_sd\_key | The key name from the original record that contains the Structured Data \(SD\) content. This configuration is optional. | | +| syslog\_sd\_key | The key name from the original record that contains a map of key/value pairs to use as Structured Data \(SD\) content. The key name is included in the resulting SD field as shown in examples below. This configuration is optional. | | | syslog\_message\_key | The key name from the original record that contains the message to deliver. Note that this property is **mandatory**, otherwise the message will be empty. | | | allow\_longer\_sd\_id| If true, Fluent-bit allows SD-ID that is longer than 32 characters. Such long SD-ID violates RFC 5424.| false | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | + +### TLS / SSL + +The Syslog output plugin supports TLS/SSL. +For more details about the properties available and general configuration, see [TLS/SSL](../../administration/transport-security.md). ## Examples @@ -37,6 +44,8 @@ As of Fluent Bit v1.5.3 the configuration is very strict. You must be aware of t Get started quickly with this configuration file: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```text [OUTPUT] name syslog @@ -55,6 +64,28 @@ Get started quickly with this configuration file: syslog_sd_key sd syslog_message_key message ``` +{% endtab %} +{% tab title="fluent-bit.yaml" %} +```yaml + outputs: + - name: syslog + match: "*" + host: syslog.yourserver.com + port: 514 + mode: udp + syslog_format: rfc5424 + syslog_maxsize: 2048 + syslog_severity_key: severity + syslog_facility_key: facility + syslog_hostname_key: hostname + syslog_appname_key: appname + syslog_procid_key: procid + syslog_msgid_key: msgid + syslog_sd_key: sd + syslog_message_key: message +``` +{% endtab %} +{% endtabs %} ### Structured Data @@ -79,6 +110,8 @@ Example log: Example configuration file: +{% tabs %} +{% tab title="fluent-bit.conf" %} ```text [OUTPUT] name syslog @@ -91,13 +124,94 @@ Example configuration file: syslog_hostname_key hostname syslog_appname_key appname syslog_procid_key procid - syslog_msgid_key msgid + syslog_msgid_key msgid syslog_sd_key uls@0 syslog_message_key log ``` +{% endtab %} +{% tab title="fluent-bit.yaml" %} +```yaml + outputs: + - name: syslog + match: "*" + host: syslog.yourserver.com + port: 514 + mode: udp + syslog_format: rfc5424 + syslog_maxsize: 2048 + syslog_hostname_key: hostname + syslog_appname_key: appname + syslog_procid_key: procid + syslog_msgid_key: msgid + syslog_sd_key: uls@0 + syslog_message_key: log +``` +{% endtab %} +{% endtabs %} Example output: ```bash <14>1 2021-07-12T14:37:35.569848Z myhost myapp 1234 ID98 [uls@0 logtype="access" clustername="mycluster" namespace="mynamespace"] Sample app log message. -``` \ No newline at end of file +``` + +### Adding Structured Data Authentication Token + +Some services use the structured data field to pass authentication tokens (e.g. `[<token>@41018]`), which would need to be added to each log message dynamically. +However, this requires setting the token as a key rather than as a value. +Here's an example of how that might be achieved, using `AUTH_TOKEN` as a [variable](../../administration/configuring-fluent-bit/classic-mode/variables.md): + +{% tabs %} +{% tab title="fluent-bit.conf" %} +```text +[FILTER] + name lua + match * + call append_token + code function append_token(tag, timestamp, record) record["${AUTH_TOKEN}"] = {} return 2, timestamp, record end + +[OUTPUT] + name syslog + match * + host syslog.yourserver.com + port 514 + mode tcp + syslog_format rfc5424 + syslog_hostname_preset my-hostname + syslog_appname_preset my-appname + syslog_message_key log + allow_longer_sd_id true + syslog_sd_key ${AUTH_TOKEN} + tls on + tls.crt_file /path/to/my.crt +``` +{% endtab %} +{% tab title="fluent-bit.yaml" %} +```yaml + filters: + - name: lua + match: "*" + call: append_token + code: | + function append_token(tag, timestamp, record) + record["${AUTH_TOKEN}"] = {} + return 2, timestamp, record + end + + outputs: + - name: syslog + match: "*" + host: syslog.yourserver.com + port: 514 + mode: tcp + syslog_format: rfc5424 + syslog_hostname_preset: myhost + syslog_appname_preset: myapp + syslog_message_key: log + allow_longer_sd_id: true + syslog_sd_key: ${AUTH_TOKEN} + tls: on + tls.crt_file: /path/to/my.crt +``` +{% endtab %} +{% endtabs %} diff --git a/pipeline/outputs/tcp-and-tls.md b/pipeline/outputs/tcp-and-tls.md index 545063593..55de1b07c 100644 --- a/pipeline/outputs/tcp-and-tls.md +++ b/pipeline/outputs/tcp-and-tls.md @@ -11,7 +11,7 @@ The **tcp** output plugin allows to send records to a remote TCP server. The pay | Format | Specify the data format to be printed. Supported formats are _msgpack_ _json_, _json\_lines_ and _json\_stream_. | msgpack | | json\_date\_key | Specify the name of the time key in the output record. To disable the time key just set the value to `false`. | date | | json\_date\_format | Specify the format of the date. Supported formats are _double_, _epoch_, _iso8601_ (eg: _2018-05-30T09:39:52.000681Z_) and _java_sql_timestamp_ (eg: _2018-05-30 09:39:52.000681_) | double | -| Workers | Enables dedicated thread(s) for this output. Default value is set since version 1.8.13. For previous versions is 0. | 2 | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `2` | ## TLS Configuration Parameters diff --git a/pipeline/outputs/treasure-data.md b/pipeline/outputs/treasure-data.md index ff2a070bf..22991f239 100644 --- a/pipeline/outputs/treasure-data.md +++ b/pipeline/outputs/treasure-data.md @@ -12,6 +12,7 @@ The plugin supports the following configuration parameters: | Database | Specify the name of your target database. | | | Table | Specify the name of your target table where the records will be stored. | | | Region | Set the service region, available values: US and JP | US | +| Workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ## Getting Started @@ -41,4 +42,3 @@ In your main configuration file append the following _Input_ & _Output_ sections Database fluentbit Table cpu_samples ``` - diff --git a/pipeline/outputs/vivo-exporter.md b/pipeline/outputs/vivo-exporter.md index 69c00dfcb..156ae257a 100644 --- a/pipeline/outputs/vivo-exporter.md +++ b/pipeline/outputs/vivo-exporter.md @@ -9,6 +9,8 @@ Vivo Exporter is an output plugin that exposes logs, metrics, and traces through | `empty_stream_on_read` | If enabled, when an HTTP client consumes the data from a stream, the stream content will be removed. | Off | | `stream_queue_size` | Specify the maximum queue size per stream. Each specific stream for logs, metrics and traces can hold up to `stream_queue_size` bytes. | 20M | | `http_cors_allow_origin` | Specify the value for the HTTP Access-Control-Allow-Origin header (CORS). | | +| `workers` | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `1` | + ### Getting Started @@ -25,7 +27,7 @@ Here is a simple configuration of Vivo Exporter, note that this example is not b match * empty_stream_on_read off stream_queue_size 20M - http_cors_allow_origin * + http_cors_allow_origin * ``` ### How it works diff --git a/pipeline/outputs/websocket.md b/pipeline/outputs/websocket.md index 8aa28916c..a5a049df1 100644 --- a/pipeline/outputs/websocket.md +++ b/pipeline/outputs/websocket.md @@ -6,12 +6,14 @@ The **websocket** output plugin allows to flush your records into a WebSocket en | Key | Description | default | | :--- | :--- | :--- | -| Host | IP address or hostname of the target WebScoket Server | 127.0.0.1 | -| Port | TCP port of the target WebScoket Server | 80 | +| Host | IP address or hostname of the target WebSocket Server | 127.0.0.1 | +| Port | TCP port of the target WebSocket Server | 80 | | URI | Specify an optional HTTP URI for the target websocket server, e.g: /something | / | +| Header | Add a HTTP header key/value pair. Multiple headers can be set. | | | Format | Specify the data format to be used in the HTTP request body, by default it uses _msgpack_. Other supported formats are _json_, _json\_stream_ and _json\_lines_ and _gelf_. | msgpack | | json\_date\_key | Specify the name of the date field in output | date | | json\_date\_format | Specify the format of the date. Supported formats are _double_, _epoch_, _iso8601_ (eg: _2018-05-30T09:39:52.000681Z_) and _java_sql_timestamp_ (eg: _2018-05-30 09:39:52.000681_) | double | +| workers | The number of [workers](../../administration/multithreading.md#outputs) to perform flush operations for this output. | `0` | ## Getting Started @@ -62,6 +64,7 @@ Websocket plugin is working with tcp keepalive mode, please refer to [networking Listen 0.0.0.0 Port 5170 Format json + [OUTPUT] Name websocket Match * diff --git a/pipeline/parsers/configuring-parser.md b/pipeline/parsers/configuring-parser.md index c9472a11d..903ebe2b4 100644 --- a/pipeline/parsers/configuring-parser.md +++ b/pipeline/parsers/configuring-parser.md @@ -29,12 +29,14 @@ Multiple parsers can be defined and each section has it own properties. The foll | Format | Specify the format of the parser, the available options here are: [json](json.md), [regex](regular-expression.md), [ltsv](ltsv.md) or [logfmt](logfmt.md). | | Regex | If format is _regex_, this option _must_ be set specifying the Ruby Regular Expression that will be used to parse and compose the structured message. | | Time\_Key | If the log entry provides a field with a timestamp, this option specifies the name of that field. | -| Time\_Format | Specify the format of the time field so it can be recognized and analyzed properly. Fluent-bit uses `strptime(3)` to parse time so you can refer to [strptime documentation](https://linux.die.net/man/3/strptime) for available modifiers. | +| Time\_Format | Specify the format of the time field so it can be recognized and analyzed properly. Fluent Bit uses `strptime(3)` to parse time. See the [strptime documentation](https://linux.die.net/man/3/strptime) for available modifiers. The `%L` field descriptor is supported for fractional seconds. | | Time\_Offset | Specify a fixed UTC time offset \(e.g. -0600, +0200, etc.\) for local dates. | -| Time\_Keep | By default when a time key is recognized and parsed, the parser will drop the original time field. Enabling this option will make the parser to keep the original time field and it value in the log entry. | +| Time\_Keep | By default when a time key is recognized and parsed, the parser will drop the original time field. Enabling this option will make the parser to keep the original time field and its value in the log entry. | +| Time\_System\_Timezone | If there is no timezone (`%z`) specified in the given `Time_Format`, enabling this option will make the parser detect and use the system's configured timezone. The configured timezone is detected from the [`TZ` environment variable](https://www.gnu.org/software/libc/manual/html_node/TZ-Variable.html). | | Types | Specify the data type of parsed field. The syntax is `types <field_name_1>:<type_name_1> <field_name_2>:<type_name_2> ...`. The supported types are `string`\(default\), `integer`, `bool`, `float`, `hex`. The option is supported by `ltsv`, `logfmt` and `regex`. | | Decode\_Field | Decode a field value, the only decoder available is `json`. The syntax is: `Decode_Field json <field_name>`. | | Skip\_Empty\_Values | Specify a boolean which determines if the parser should skip empty values. The default is `true`. | +| Time_Strict | The default value (`true`) tells the parser to be strict with the expected time format. With this option set to false, the parser will be permissive with the format of the time. This is useful when the format expects time fraction but the time to be parsed doesn't include it. | ## Parsers Configuration File diff --git a/pipeline/parsers/decoders.md b/pipeline/parsers/decoders.md index 38e9244f6..4fb4016f9 100644 --- a/pipeline/parsers/decoders.md +++ b/pipeline/parsers/decoders.md @@ -1,29 +1,35 @@ # Decoders -There are certain cases where the log messages being parsed contains encoded data, a typical use case can be found in containerized environments with Docker: application logs it data in JSON format but becomes an escaped string, Consider the following example +There are cases where the log messages being parsed contain encoded data. A typical +use case can be found in containerized environments with Docker. Docker logs its +data in JSON format, which uses escaped strings. -Original message generated by the application: +Consider the following message generated by the application: ```text {"status": "up and running"} ``` -Then the Docker log message become encapsulated as follows: +The Docker log message encapsulates something like this: ```text {"log":"{\"status\": \"up and running\"}\r\n","stream":"stdout","time":"2018-03-09T01:01:44.851160855Z"} ``` -as you can see the original message is handled as an escaped string. Ideally in Fluent Bit we would like to keep having the original structured message and not a string. +The original message is handled as an escaped string. Fluent Bit wants to use the +original structured message and not a string. ## Getting Started -Decoders are a built-in feature available through the Parsers file, each Parser definition can optionally set one or multiple decoders. There are two type of decoders type: +Decoders are a built-in feature available through the Parsers file. Each parser +definition can optionally set one or more decoders. There are two types of decoders: -* Decode\_Field: if the content can be decoded in a structured message, append that structure message \(keys and values\) to the original log message. -* Decode\_Field\_As: any content decoded \(unstructured or structured\) will be replaced in the same key/value, no extra keys are added. +- `Decode_Field`: If the content can be decoded in a structured message, append + the structured message (keys and values) to the original log message. +- `Decode_Field_As`: Any decoded content (unstructured or structured) will be + replaced in the same key/value, and no extra keys are added. -Our pre-defined Docker Parser have the following definition: +Our pre-defined Docker parser has the following definition: ```text [PARSER] @@ -37,35 +43,40 @@ Our pre-defined Docker Parser have the following definition: Decode_Field_As escaped log ``` -Each line in the parser with a key _Decode\_Field_ instruct the parser to apply a specific decoder on a given field, optionally it offer the option to take an extra action if the decoder cannot succeed. +Each line in the parser with a key `Decode_Field` instructs the parser to apply +a specific decoder on a given field. Optionally, it offers the option to take an +extra action if the decoder doesn't succeed. -### Decoders +### Decoder options -| Name | Description | -| :--- | :--- | -| json | handle the field content as a JSON map. If it find a JSON map it will replace the content with a structured map. | -| escaped | decode an escaped string. | -| escaped\_utf8 | decode a UTF8 escaped string. | +| Name | Description | +| -------------- | ----------- | +| `json` | Handle the field content as a JSON map. If it finds a JSON map, it replaces the content with a structured map. | +| `escaped` | Decode an escaped string. | +| `escaped_utf8` | Decode a UTF8 escaped string. | ### Optional Actions -By default if a decoder fails to decode the field or want to try a next decoder, is possible to define an optional action. Available actions are: +If a decoder fails to decode the field or, you want to try another decoder, you can +define an optional action. Available actions are: | Name | Description | -| :--- | :--- | -| try\_next | if the decoder failed, apply the next Decoder in the list for the same field. | -| do\_next | if the decoder succeeded or failed, apply the next Decoder in the list for the same field. | +| -----| ----------- | +| `try_next` | if the decoder failed, apply the next decoder in the list for the same field. | +| `do_next` | if the decoder succeeded or failed, apply the next decoder in the list for the same field. | -Note that actions are affected by some restrictions: +Actions are affected by some restrictions: -* on Decode\_Field\_As, if succeeded, another decoder of the same type in the same field can be applied only if the data continues being an unstructured message \(raw text\). -* on Decode\_Field, if succeeded, can only be applied once for the same field. By nature Decode\_Field aims to decode a structured message. +- `Decode_Field_As`: If successful, another decoder of the same type and the same + field can be applied only if the data continues being an unstructured message (raw text). +- `Decode_Field`: If successful, can only be applied once for the same field. + `Decode`_Field` is intended to decode a structured message. ### Examples -### escaped\_utf8 +#### `escaped_utf8` -Example input \(from `/path/to/log.log` in configuration below\) +Example input from `/path/to/log.log`: ```text {"log":"\u0009Checking indexes...\n","stream":"stdout","time":"2018-02-19T23:25:29.1845444Z"} @@ -73,18 +84,18 @@ Example input \(from `/path/to/log.log` in configuration below\) {"log":"\u0009Done\n","stream":"stdout","time":"2018-02-19T23:25:29.1845622Z"} ``` -Example output +Example output: ```text -[24] tail.0: [1519082729.184544400, {"log"=>" Checking indexes... +[24] tail.0: [1519082729.184544400, {"log"=>" Checking indexes... ", "stream"=>"stdout", "time"=>"2018-02-19T23:25:29.1845444Z"}] [25] tail.0: [1519082729.184553600, {"log"=>" Validated: _audit _internal _introspection _telemetry _thefishbucket history main snmp_data summary ", "stream"=>"stdout", "time"=>"2018-02-19T23:25:29.1845536Z"}] -[26] tail.0: [1519082729.184562200, {"log"=>" Done +[26] tail.0: [1519082729.184562200, {"log"=>" Done ", "stream"=>"stdout", "time"=>"2018-02-19T23:25:29.1845622Z"}] ``` -Configuration file +Decoder configuration file: ```text [SERVICE] @@ -100,7 +111,7 @@ Configuration file Match * ``` -The `fluent-bit-parsers.conf` file, +The `fluent-bit-parsers.conf` file: ```text [PARSER] @@ -110,4 +121,3 @@ The `fluent-bit-parsers.conf` file, Time_Format %Y-%m-%dT%H:%M:%S %z Decode_Field_as escaped_utf8 log ``` - diff --git a/pipeline/parsers/regular-expression.md b/pipeline/parsers/regular-expression.md index 8cce3eeae..99deb4bb7 100644 --- a/pipeline/parsers/regular-expression.md +++ b/pipeline/parsers/regular-expression.md @@ -1,28 +1,38 @@ # Regular Expression -The **regex** parser allows to define a custom Ruby Regular Expression that will use a named capture feature to define which content belongs to which key name. +The **Regex** parser lets you define a custom Ruby regular expression that uses +a named capture feature to define which content belongs to which key name. -Fluent Bit uses [Onigmo](https://github.com/k-takata/Onigmo) regular expression library on Ruby mode, for testing purposes you can use the following web editor to test your expressions: +Use [Tail Multiline](../inputs/tail.md#multiline) when you need to support regexes +across multiple lines from a `tail`. The [Tail](../inputs/tail.md) input plugin +treats each line as a separate entity. -[http://rubular.com/](http://rubular.com/) +{% hint style="warning" %} +Security Warning: Onigmo is a backtracking regex engine. When using expensive +regex patterns Onigmo can take a long time to perform pattern matching. Read +["ReDoS"](https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS) +on OWASP for additional information. +{% end hint %} -Important: do not attempt to add multiline support in your regular expressions if you are using [Tail](../inputs/tail.md) input plugin since each line is handled as a separated entity. Instead use Tail [Multiline](../inputs/tail.md#multiline) support configuration feature. +Setting the format to **regex** requires a `regex` configuration key. -Security Warning: Onigmo is a _backtracking_ regex engine. You need to be careful not to use expensive regex patterns, or Onigmo can take very long time to perform pattern matching. For details, please read the article ["ReDoS"](https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS) on OWASP. - -> Note: understanding how regular expressions works is out of the scope of this content. +## Configuration Parameters -From a configuration perspective, when the format is set to **regex**, is mandatory and expected that a _Regex_ configuration key exists. +The regex parser supports the following configuration parameters: -## Configuration Parameters +| Key | Description | Default Value | +| --- | ----------- | ------------- | +| `Skip_Empty_Values` | If enabled, the parser ignores empty value of the record. | `True` | -The regex parser supports the following configuration parameters. +Fluent Bit uses the [Onigmo](https://github.com/k-takata/Onigmo) regular expression +library on Ruby mode. -|Key|Description|Default Value| -|-------|------------|--------| -|`Skip_Empty_Values`|If enabled, the parser ignores empty value of the record.| True| +You can use only alphanumeric characters and underscore in group names. For example, +a group name like `(?<user-name>.*)` causes an error due to the invalid dash (`-`) +character. Use the [Rubular](http://rubular.com/) web editor to test your expressions. -The following parser configuration example aims to provide rules that can be applied to an Apache HTTP Server log entry: +The following parser configuration example provides rules that can be applied to an +Apache HTTP Server log entry: ```python [PARSER] @@ -34,13 +44,14 @@ The following parser configuration example aims to provide rules that can be app Types code:integer size:integer ``` -As an example, takes the following Apache HTTP Server log entry: +As an example, review the following Apache HTTP Server log entry: ```text 192.168.2.20 - - [29/Jul/2015:10:27:10 -0300] "GET /cgi-bin/try/ HTTP/1.0" 200 3395 ``` -The above content do not provide a defined structure for Fluent Bit, but enabling the proper parser we can help to make a structured representation of it: +This log entry doesn't provide a defined structure for Fluent Bit. Enabling the +proper parser can help to make a structured representation of the entry: ```text [1154104030, {"host"=>"192.168.2.20", @@ -54,8 +65,3 @@ The above content do not provide a defined structure for Fluent Bit, but enablin } ] ``` - -A common pitfall is that you cannot use characters other than alphabets, numbers and underscore in group names. For example, a group name like `(?<user-name>.*)` will cause an error due to containing an invalid character \(`-`\). - -In order to understand, learn and test regular expressions like the example above, we suggest you try the following Ruby Regular Expression Editor: [http://rubular.com/r/X7BH0M4Ivm](http://rubular.com/r/X7BH0M4Ivm) - diff --git a/pipeline/pipeline-monitoring.md b/pipeline/pipeline-monitoring.md index 6d6a654a1..830624e5b 100644 --- a/pipeline/pipeline-monitoring.md +++ b/pipeline/pipeline-monitoring.md @@ -9,5 +9,4 @@ A Data Pipeline represents a flow of data that goes through the inputs \(sources * [HTTP Server: JSON and Prometheus Exporter-style metrics](../administration/monitoring.md#http-server) * [Grafana Dashboards and Alerts](../administration/monitoring.md#grafana-dashboard-and-alerts) * [Health Checks](../administration/monitoring.md#health-check-for-fluent-bit) -* [Calyptia Cloud: hosted service to monitor and visualize your pipelines](../administration/monitoring.md#calyptia-cloud) - +* [Telemetry Pipeline: hosted service to monitor and visualize your pipelines](../administration/monitoring.md#telemetry-pipeline) diff --git a/pipeline/processors/README.md b/pipeline/processors/README.md new file mode 100644 index 000000000..c1f055904 --- /dev/null +++ b/pipeline/processors/README.md @@ -0,0 +1,28 @@ +# Processors + +Processors are components that modify, transform, or enhance data as it flows +through Fluent Bit. Unlike [filters](../filters/README.md), processors are +tightly coupled to inputs, which means they execute immediately and avoid +creating a performance bottleneck. + +Additionally, filters can be implemented in a way that mimics the behavior of +processors, but processors can't be implemented in a way that mimics filters. + +## Available processors + +Fluent Bit offers the following processors: + +- [Content Modifier](content-modifier.md): Manipulate the content, metadata, and + attributes of logs and traces. +- [Labels](labels.md): Add, update, or delete metric labels. +- [Metrics Selector](metrics-selector.md): Choose which metrics to keep or discard. +- [OpenTelemetry Envelope](opentelemetry-envelope.md): Transform logs into an + OpenTelemetry-compatible format. +- [SQL](sql.md): Use SQL queries to extract log content. + +## Features + +Compatible processors include the following features: + +- [Conditional Processing](conditional-processing.md): Selectively apply processors + to logs based on the value of fields that those logs contain. diff --git a/pipeline/processors/conditional-processing.md b/pipeline/processors/conditional-processing.md new file mode 100644 index 000000000..c53d5f02c --- /dev/null +++ b/pipeline/processors/conditional-processing.md @@ -0,0 +1,246 @@ +# Conditional processing + +Conditional processing lets you selectively apply [processors](README.md) to +logs based on the value of fields that those logs contain. This feature lets you +create processing pipelines that only process records that meet certain +criteria, and ignore the rest. + +Conditional processing is available in Fluent Bit version 4.0 and greater. + +## Configuration + +You can turn a standard processor into a conditional processor by adding a +`condition` block to the processor's YAML configuration settings. + +{% hint style="info" %} +Conditional processing is only available for [YAML configuration files](../../administration/configuring-fluent-bit/yaml/README.md), not [classic configuration files](../../administration/configuring-fluent-bit/classic-mode/README.md). +{% endhint %} + + +These `condition` blocks use the following syntax: + +```yaml +pipeline: + inputs: + <...> + processors: + logs: + - name: {processor_name} + <...> + condition: + op: {and|or} + rules: + - field: {field_name1} + op: {comparison_operator} + value: {comparison_value1} + - field: {field_name2} + op: {comparison_operator} + value: {comparison_value2} + <...> +``` + +Each processor can only have a single `condition` block, but that condition can +include multiple rules. These rules are stored as items in the `condition.rules` +array. + +### Condition evaluation + +The `condition.op` parameter specifies the condition's evaluation logic. It has +two possible values: + +- `and`: A log entry meets this condition when all of the rules in the `condition.rules` + are [truthy](https://developer.mozilla.org/en-US/docs/Glossary/Truthy). +- `or`: A log entry meets this condition when one or more rules in the `condition.rules` + array are [truthy](https://developer.mozilla.org/en-US/docs/Glossary/Truthy). + +### Rules + +Each item in the `condition.rules` array must include values for the following parameters: + +| Parameter | Description | +| --- | --- | +| `field` | The field within your logs to evaluate. The value of this parameter must use [the correct syntax](#field-access) to access the fields inside logs. | +| `op` | The [comparison operator](#comparison-operators) to evaluate whether the rule is true. This parameter (`condition.rules.op`) is distinct from the `condition.op` parameter and has different possible values. | +| `value` | The value of the specified log field to use in your comparison. Optionally, you can provide [an array that contains multiple values](#array-of-values). | + +Rules are evaluated against each log that passes through your data pipeline. For example, given a rule with these parameters: + +``` +- field: "$status" + op: eq + value: 200 +``` + +This rule evaluates to `true` for a log that contains the string `'status':200`, but evaluates to `false` for a log that contains the string `'status':403`. + +#### Field access + +The `conditions.rules.field` parameter uses [record accessor syntax](/administration/configuring-fluent-bit/classic-mode/record-accessor.md) to reference fields inside logs. + +You can use `$field` syntax to access a top-level field, and `$field['child']['subchild']` to access nested fields. + +#### Comparison operators + +The `conditions.rules.op` parameter has the following possible values: + +- `eq`: equal to +- `neq`: not equal to +- `gt`: greater than +- `lt`: less than +- `gte`: greater than or equal to +- `lte`: less than or equal to +- `regex`: matches a regular expression +- `not_regex`: does not match a regular expression +- `in`: is included in the specified array +- `not_in`: is not included in the specified array + +## Examples + +### Basic condition + +This example applies a condition that only processes logs that contain the +string `{"request": {"method": "POST"`: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"request": {"method": "GET", "path": "/api/v1/resource"}}' + tag: request.log + processors: + logs: + - name: content_modifier + action: insert + key: modified_if_post + value: true + condition: + op: and + rules: + - field: "$request['method']" + op: eq + value: "POST" +``` + +### Multiple conditions with `and` + +This example applies a condition that only processes logs when all of the +specified rules are met: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"request": {"method": "POST", "path": "/api/v1/sensitive-data"}}' + tag: request.log + processors: + logs: + - name: content_modifier + action: insert + key: requires_audit + value: true + condition: + op: and + rules: + - field: "$request['method']" + op: eq + value: "POST" + - field: "$request['path']" + op: regex + value: "\/sensitive-.*" +``` + +### Multiple conditions with `or` + +This example applies a condition that only processes logs when one or more of +the specified rules are met: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"request": {"method": "GET", "path": "/api/v1/resource", "status_code": 200, "response_time": 150}}' + tag: request.log + processors: + logs: + - name: content_modifier + action: insert + key: requires_performance_check + value: true + condition: + op: or + rules: + - field: "$request['response_time']" + op: gt + value: 100 + - field: "$request['status_code']" + op: gte + value: 400 +``` + +### Array of values + +This example uses an array for the value of `condition.rules.value`: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"request": {"method": "GET", "path": "/api/v1/resource"}}' + tag: request.log + processors: + logs: + - name: content_modifier + action: insert + key: high_priority_method + value: true + condition: + op: and + rules: + - field: "$request['method']" + op: in + value: ["POST", "PUT", "DELETE"] +``` + +### Multiple processors with conditions + +This example uses multiple processors with conditional processing enabled for each: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"log": "Error: Connection refused", "level": "error", "service": "api-gateway"}' + tag: app.log + processors: + logs: + - name: content_modifier + action: insert + key: alert + value: true + condition: + op: and + rules: + - field: "$level" + op: eq + value: "error" + - field: "$service" + op: in + value: ["api-gateway", "authentication", "database"] + + - name: content_modifier + action: insert + key: paging_required + value: true + condition: + op: and + rules: + - field: "$log" + op: regex + value: "(?i)(connection refused|timeout|crash)" + - field: "$level" + op: in + value: ["error", "fatal"] +``` + +This configuration adds an `alert` field to error logs from critical services, +and adds a `paging_required` field to errors that contain specific critical patterns. diff --git a/pipeline/processors/content-modifier.md b/pipeline/processors/content-modifier.md new file mode 100644 index 000000000..cb42784ea --- /dev/null +++ b/pipeline/processors/content-modifier.md @@ -0,0 +1,234 @@ +# Content Modifier + +The **content_modifier** processor allows you to manipulate the messages, metadata/attributes and content of Logs and Traces. + +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=ee1ad690-a3e9-434f-9635-3e53c670e96c" /> + +Similar to the functionality exposed by filters, this processor presents a unified mechanism to perform such operations for data manipulation. The most significant difference is that processors perform better than filters, and when chaining them, there are no encoding/decoding performance penalties. + +Note that processors and this specific component can only be enabled using the new YAML configuration format. Classic mode configuration format doesn't support processors. + +## Contexts + +The processor, works on top of what we call a __context__, meaning _the place_ where the content modification will happen. We provide different contexts to manipulate the desired information, the following contexts are available: + +| Context Name | Signal | Description | +| -- | -- | -- | +| `attributes` | Logs | Modify the attributes or metadata of a Log record. | +| `body` | Logs | Modify the content of a Log record. | +| `span_name` | Traces | Modify the name of a Span. | +| `span_kind` | Traces | Modify the kind of a Span. | +| `span_status` | Traces | Modify the status of a Span. | +| `span_attributes` | Traces | Modify the attributes of a Span. | + + +### OpenTelemetry Contexts + +In addition, we provide special contexts to operate on data that follows an __OpenTelemetry Log Schema__, all of them operates on shared data across a group of records: + +| Context Name | Signal | Description | +| -- | -- | -- | +| `otel_resource_attributes` | Logs | Modify the attributes of the Log Resource. | +| `otel_scope_name` | Logs | Modify the name of a Log Scope. | +| `otel_scope_version` | Logs | Modify version of a Log Scope. | +| `otel_scope_attributes` | Logs | Modify the attributes of a Log Scope. | + +> TIP: if your data is not following the OpenTelemetry Log Schema and your backend or destination for your logs expects to be in an OpenTelemetry schema, take a look at the processor called OpenTelemetry Envelope that you can use in conjunbction with this processor to transform your data to be compatible with OpenTelemetry Log schema. + +## Configuration Parameters + +| Key | Description | +| :---------- | :--- | +| context | Specify the context where the modifications will happen (more details above).The following contexts are available: `attributes`, `body`, `span_name`, `span_kind`, `span_status`, `span_attributes`, `otel_resource_attributes`, `otel_scope_name`, `otel_scope_version`, `otel_scope_attributes`. | +| key | Specify the name of the key that will be used to apply the modification. | +| value | Based on the action type, `value` might required and represent different things. Check the detailed information for the specific actions. | +| pattern | Defines a regular expression pattern. This property is only used by the `extract` action. | +| converted_type | Define the data type to perform the conversion, the available options are: `string`, `boolean`, `int` and `double` . | + +### Actions + +The actions specify the type of operation to run on top of a specific key or content from a Log or a Trace. The following actions are available: + +| Action | Description | +| ------- | ------------------------------------------------------------ | +| `insert` | Insert a new key with a value into the target context. The `key` and `value` parameters are required. | +| `upsert` | Given a specific key with a value, the `upsert` operation will try to update the value of the key. If the key does not exist, the key will be created. The `key` and `value` parameters are required. | +| `delete` | Delete a key from the target context. The `key` parameter is required. | +| `rename` | Change the name of a key. The `value` set in the configuration will represent the new name. The `key` and `value` parameters are required. | +| `hash` | Replace the key value with a hash generated by the SHA-256 algorithm, the binary value generated is finally set as an hex string representation. The `key` parameter is required. | +| `extract` | Allows to extact the value of a single key as a list of key/value pairs. This action needs the configuration of a regular expression in the `pattern` property . The `key` and `pattern` parameters are required. For more details check the examples below. | +| `convert` | Convert the data type of a key value. The `key` and `converted_type` parameters are required. | + +#### Insert example + +The following example appends the key `color` with the value `blue` to the log stream. + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"key1": "123.4"}' + + processors: + logs: + - name: content_modifier + action: insert + key: "color" + value: "blue" + outputs: + - name : stdout + match: '*' + format: json_lines +``` + +#### Upsert example + +Update the value of `key1` and insert `key2`: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"key1": "123.4"}' + + processors: + logs: + - name: content_modifier + action: upsert + key: "key1" + value: "5678" + + - name: content_modifier + action: upsert + key: "key2" + value: "example" + + outputs: + - name : stdout + match: '*' + format: json_lines + +``` + + +#### Delete example + +Delete `key2` from the stream: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"key1": "123.4", "key2": "example"}' + + processors: + logs: + - name: content_modifier + action: delete + key: "key2" + + outputs: + - name : stdout + match: '*' + format: json_lines +``` + +#### Rename example + +Change the name of `key2` to `test`: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"key1": "123.4", "key2": "example"}' + + processors: + logs: + - name: content_modifier + action: rename + key: "key2" + value: "test" + + outputs: + - name : stdout + match: '*' + format: json_lines +``` + +#### Hash example + +Apply the SHA-256 algorithm for the value of the key `password`: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"username": "bob", "password": "12345"}' + + processors: + logs: + - name: content_modifier + action: hash + key: "password" + + outputs: + - name : stdout + match: '*' + format: json_lines +``` + + + +#### Extract example + +By using a domain address, perform a extraction of the components of it as a list of key value pairs: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"http.url": "https://fluentbit.io/docs?q=example"}' + + processors: + logs: + - name: content_modifier + action: extract + key: "http.url" + pattern: ^(?<http_protocol>https?):\/\/(?<http_domain>[^\/\?]+)(?<http_path>\/[^?]*)?(?:\?(?<http_query_params>.*))? + + outputs: + - name : stdout + match: '*' + format: json_lines +``` + + + +#### Convert example + +Both keys in the example are strings. Convert the `key1` to a double/float type and `key2` to a boolean: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"key1": "123.4", "key2": "true"}' + + processors: + logs: + - name: content_modifier + action: convert + key: key1 + converted_type: int + + - name: content_modifier + action: convert + key: key2 + converted_type: boolean + + outputs: + - name : stdout + match: '*' + format: json_lines +``` diff --git a/pipeline/processors/labels.md b/pipeline/processors/labels.md new file mode 100644 index 000000000..cc1c663a0 --- /dev/null +++ b/pipeline/processors/labels.md @@ -0,0 +1,111 @@ +# Labels +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=1e9a2474-00c3-4d8d-b170-79996be7af79" /> + +The **labels** processor lets you manipulate the labels of metrics. + +Similar to filters, this processor presents a enriching/modifying mechanism to +perform operations for labels manipulation. The most significant difference is +that processors perform better than filters, and when chaining them there are no +encoding or decoding performance penalties. + +{% hint style="info" %} +**Note:** Both processors and this specific component can be enabled only by using +the YAML configuration format. Classic mode configuration format doesn't support +processors. +{% endhint %} + +## Configuration Parameters + +| Key | Description | +| :----- | :---------- | +| update | Update an existing key with a value into metrics. The key/value pair is required. If the specified key doesn't exist, the operation silently fails and has no effect. | +| insert | Insert a new key with a value into metrics. The key/value pair is required. | +| upsert | Upsert a specific key with a value, the `upsert` operation will try to update the value of the key. If the key does not exist, the key will be created. The key-value pair is required. | +| delete | Delete a key from the labels of metrics. The key/value pair is required. If the specified key doesn't exist, the operation silently fails and has no effect. | +| hash | Replace the key value with a hash generated by the SHA-256 algorithm from the specified label name. The generated binary value is set as a hex string. | + +#### Update example + +Change the value of the `name` to `fluentbit`: + +```yaml +pipeline: + inputs: + - name: fluentbit_metrics + processors: + metrics: + - name: labels + update: name fluentbit + outputs: + - name : stdout + match: '*' +``` + +#### Insert example + +The following example appends the key `agent` with the value `fluentbit` as the label +of metrics: + +```yaml +pipeline: + inputs: + - name: fluentbit_metrics + processors: + metrics: + - name: labels + insert: agent fluentbit + outputs: + - name : stdout + match: '*' +``` + +#### Upsert example + +Upsert the value of `name` and insert `fluentbit`: + +```yaml +pipeline: + inputs: + - name: fluentbit_metrics + processors: + metrics: + - name: labels + upsert: name fluentbit + outputs: + - name : stdout + match: '*' +``` + +#### Delete example + +Delete containing `name` key from metrics: + +```yaml +pipeline: + inputs: + - name: fluentbit_metrics + processors: + metrics: + - name: labels + delete: name + outputs: + - name : stdout + match: '*' +``` + +#### Hash example + +Apply the SHA-1 algorithm for the value of the key `hostname`: + +```yaml +pipeline: + inputs: + - name: fluentbit_metrics + processors: + metrics: + - name: labels + hash: hostname + outputs: + - name : stdout + match: '*' +``` diff --git a/pipeline/processors/metrics-selector.md b/pipeline/processors/metrics-selector.md new file mode 100644 index 000000000..262075f9a --- /dev/null +++ b/pipeline/processors/metrics-selector.md @@ -0,0 +1,93 @@ +# Metrics Selector + +The **metric_selector** processor allows you to select metrics to include or exclude (similar to the `grep` filter for logs). + +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=326269f3-cfea-472d-9169-1de32c142b90" /> + +## Configuration Parameters <a id="config"></a> + +The native processor plugin supports the following configuration parameters: + +| Key | Description | Default | +| :---------- | :--- | :--- | +| Metric\_Name | Keep metrics in which the metric of name matches with the actual name or the regular expression. | | +| Context | Specify matching context. Currently, metric\_name and delete\_label\_value are only supported. | `Metrics_Name` | +| Action | Specify the action for specified metrics. INCLUDE and EXCLUDE are allowed. | | +| Operation\_Type | Specify the operation type of action for metrics payloads. PREFIX and SUBSTRING are allowed. | | +| Label | Specify a label key and value pair. | | + +## Configuration Examples <a id="config_example"></a> + +Here is a basic configuration example. + +{% tabs %} +{% tab title="fluent-bit.yaml" %} +```yaml +service: + flush: 5 + daemon: off + log_level: info + +pipeline: + inputs: + - name: fluentbit_metrics + tag: fluentbit.metrics + scrape_interval: 10 + + processors: + metrics: + - name: metrics_selector + metric_name: /storage/ + action: include + - name: metrics_selector + metric_name: /fs/ + action: exclude + + - name: labels + delete: name + + + outputs: + - name: stdout + match: '*' +``` +{% endtab %} + +{% tab title="context-delete\_label\_value.yaml" %} +```yaml +service: + flush: 5 + daemon: off + log_level: info + +pipeline: + inputs: + - name: fluentbit_metrics + tag: fluentbit.metrics + scrape_interval: 10 + + processors: + metrics: + - name: metrics_selector + context: delete_label_value + label: name stdout.0 + + - name: labels + delete: name + + + outputs: + - name: stdout + match: '*' +``` +{% endtab %} +{% endtabs %} + + +All processors are only valid with the YAML configuration format. +Processor configuration should be located under the relevant input or output plugin configuration. + +Metric\_Name parameter will translate the strings which is quoted with backslashes `/.../` as Regular expressions. +Without them, users need to specify Operation\_Type whether prefix matching or substring matching. +The default operation is prefix matching. +For example, `/chunks/` will be translated as a regular expression. diff --git a/pipeline/processors/opentelemetry-envelope.md b/pipeline/processors/opentelemetry-envelope.md new file mode 100644 index 000000000..f9df45a3c --- /dev/null +++ b/pipeline/processors/opentelemetry-envelope.md @@ -0,0 +1,165 @@ +# OpenTelemetry Envelope + +The _OpenTelemetry Envelope_ processor is used to transform your data to be compatible with the OpenTelemetry Log schema. If your data was __not__ generated by [OpenTelemetry input](../inputs/opentelemetry.md) and your backend or destination for your logs expects to be in an OpenTelemetry schema. + + + +## Configuration Parameters + +The processor does not provide any extra configuration parameter, it can be used directly in your _processors_ Yaml directive. + +## Usage Example + +In this example, we will use the Dummy input plugin to generate a sample message per second, right after is created the processor `opentelemetry_envelope` is used to transform the data to be compatible with the OpenTelemetry Log schema. The output is sent to the standard output and also to an OpenTelemetry collector which is receiving data in port 4318. + + +__fluent-bit.yaml__ + +```yaml +service: + flush: 1 + log_level: info + +pipeline: + inputs: + - name: dummy + dummy: '{"message": "Hello World"}' + + processors: + logs: + - name: opentelemetry_envelope + + outputs: + - name : stdout + match: '*' + + - name: opentelemetry + match: '*' + host: 127.0.0.1 + port: 4318 +``` + +__otel-collector.yaml__ + +```yaml +receivers: + otlp: + protocols: + http: + endpoint: 127.0.0.1:4318 + +exporters: + file: + path: out.json + logging: + loglevel: info + +service: + telemetry: + logs: + level: debug + pipelines: + logs: + receivers: [otlp] + exporters: [file, logging] +``` + + You will notice in the standard output of FLuent Bit will print the raw representation of the schema, however, the OpenTelemetry collector will receive the data in the OpenTelemetry Log schema. + +Inspecting the output file `out.json` you will see the data in the OpenTelemetry Log schema: + + +```json +{ + "resourceLogs": [ + { + "resource": {}, + "scopeLogs": [ + { + "scope": {}, + "logRecords": [ + { + "timeUnixNano": "1722904188085758000", + "body": { + "stringValue": "dummy" + }, + "traceId": "", + "spanId": "" + } + ] + } + ] + } + ] +} +``` + +While OpenTelemetry Envelope enrich your logs with the Schema, you might be interested into take a step further and use the [Content Modifier](../processors/content-modifier.md) processor to modify the content of your logs. Here is a quick example that will allow you to add some resource and scope attributes to your logs: + +```yaml +service: + flush: 1 + log_level: info + +pipeline: + inputs: + - name: dummy + dummy: '{"message": "Hello World"}' + + processors: + logs: + - name: opentelemetry_envelope + + - name: content_modifier + context: otel_resource_attributes + action: upsert + key: service.name + value: my-service + + outputs: + - name : stdout + match: '*' + + - name: opentelemetry + match: '*' + host: 127.0.0.1 + port: 4318 +``` + +The collector JSON output will look like this: + +```json +{ + "resourceLogs": [ + { + "resource": { + "attributes": [ + { + "key": "service.name", + "value": { + "stringValue": "my-service" + } + } + ] + }, + "scopeLogs": [ + { + "scope": {}, + "logRecords": [ + { + "timeUnixNano": "1722904465173450000", + "body": { + "stringValue": "Hello World" + }, + "traceId": "", + "spanId": "" + } + ] + } + ] + } + ] +} +``` + +For more details about further processing, read the [Content Modifier](../processors/content-modifier.md) processor documentation. diff --git a/pipeline/processors/sql.md b/pipeline/processors/sql.md new file mode 100644 index 000000000..47482f80a --- /dev/null +++ b/pipeline/processors/sql.md @@ -0,0 +1,74 @@ +# Structured Query Language (SQL) + +The **sql** processor provides a simple interface to select content from Logs by also supporting conditional expressions. + +<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=6bd80893-c66f-4950-9e6d-c21358e9e8c9" /> + +Our SQL processor does not depend on a database or indexing; it runs everything on the fly (this is good). We don't have the concept of tables but you run the query on the STREAM. + +Note that this processor differs from the "stream processor interface" that runs after the filters; this one can only be used in the processor's section of the input plugins when using YAML configuration mode. + +## Configuration Parameters + +| Key | Description | +| :---------- | :--- | +| query | Define the SQL statement to run on top of the Logs stream; it must end with `;` . | + + + +### Simple selection example + +The following example generates a sample message with two keys called `key` and `http.url`. By using a simple SQL statement we will select only the key `http.url`. + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"key1": "123.4", "http.url": "https://fluentbit.io/search?q=docs"}' + + processors: + logs: + - name: sql + query: "SELECT http.url FROM STREAM;" + + outputs: + - name : stdout + match: '*' + format: json_lines +``` + +### Extract and select example + +Similar to the example above, now we will extract the parts of `http.url` and only select the domain from the value, for that we will use together content-modifier and sql processors together: + +```yaml +pipeline: + inputs: + - name: dummy + dummy: '{"key1": "123.4", "http.url": "https://fluentbit.io/search?q=docs"}' + + processors: + logs: + - name: content_modifier + action: extract + key: "http.url" + pattern: ^(?<http_protocol>https?):\/\/(?<http_domain>[^\/\?]+)(?<http_path>\/[^?]*)?(?:\?(?<http_query_params>.*))? + + - name: sql + query: "SELECT http_domain FROM STREAM;" + + outputs: + - name : stdout + match: '*' + format: json_lines +``` + +the expected output of this pipeline will be something like this: + +```json +{ + "date": 1711059261.630668, + "http_domain": "fluentbit.io" +} +``` + diff --git a/vale-styles/FluentBit/AMPM.yml b/vale-styles/FluentBit/AMPM.yml new file mode 100644 index 000000000..5b696b3b8 --- /dev/null +++ b/vale-styles/FluentBit/AMPM.yml @@ -0,0 +1,10 @@ + +extends: existence +message: "Use 'AM' or 'PM' (preceded by a space)." +link: 'https://developers.google.com/style/word-list' +level: suggestion +nonword: true +tokens: + - '\d{1,2}[AP]M' + - '\d{1,2} ?[ap]m' + - '\d{1,2} ?[aApP]\.[mM]\.' \ No newline at end of file diff --git a/vale-styles/FluentBit/Acronyms.yml b/vale-styles/FluentBit/Acronyms.yml new file mode 100644 index 000000000..19936b25c --- /dev/null +++ b/vale-styles/FluentBit/Acronyms.yml @@ -0,0 +1,95 @@ +extends: conditional +message: "Spell out '%s', if it's unfamiliar to the audience." +link: 'https://developers.google.com/style/abbreviations' +level: suggestion +ignorecase: false +# Ensures that the existence of 'first' implies the existence of 'second'. +first: '\b([A-Z]{3,5})\b' +second: '(?:\b[A-Z][a-z]+ )+\(([A-Z]{3,5})\)' +# ... with the exception of these: +exceptions: + - ACL + - API + - ARN + - ASC + - ASP + - AWS + - CIDR + - CLI + - CPU + - CRD + - CSS + - CSV + - DEBUG + - DESC + - DOM + - DNS + - DPI + - DPPS + - FAQ + - FIPS + - GCC + - GCP + - GDB + - GET + - GNU + - GPG + - GPU + - GTK + - GUI + - GZIP + - HPA + - IAM + - HTML + - HTTP + - HTTPS + - IDE + - JAR + - JSON + - JSX + - LESS + - LLDB + - LTS + - NET + - NOTE + - NVDA + - OSS + - PATH + - PEM + - PDF + - PHP + - POSIX + - POST + - RAM + - REPL + - REST + - RHEL + - RPC + - RSA + - SASL + - SCM + - SCSS + - SDK + - SIEM + - SLA + - SQL + - SSH + - SSL + - SSO + - SVG + - TBD + - TCP + - TLS + - TRE + - TODO + - UDP + - URI + - URL + - USB + - UTC + - UTF + - UUID + - XML + - XSS + - YAML + - ZIP diff --git a/vale-styles/FluentBit/AmSpelling.yml b/vale-styles/FluentBit/AmSpelling.yml new file mode 100644 index 000000000..9a8ea2e30 --- /dev/null +++ b/vale-styles/FluentBit/AmSpelling.yml @@ -0,0 +1,8 @@ +extends: existence +message: "In general, use American spelling instead of '%s'." +link: 'https://developers.google.com/style/spelling' +ignorecase: true +level: suggestion +tokens: + - '(?:\w+)nised?' + - '(?:\w+)logue' \ No newline at end of file diff --git a/vale-styles/FluentBit/Ampersand.yml b/vale-styles/FluentBit/Ampersand.yml new file mode 100644 index 000000000..75117bc08 --- /dev/null +++ b/vale-styles/FluentBit/Ampersand.yml @@ -0,0 +1,9 @@ +--- +extends: existence +message: "Don't use an ampersand in place of the word 'and'. Always write out 'and' unless the ampersand is part of a proper name." +nonword: true +ignorecase: false +level: suggestion +scope: sentence +tokens: + - '[^\*{2}].*.&.*[^\*{2}]\n' diff --git a/vale-styles/FluentBit/Colons.yml b/vale-styles/FluentBit/Colons.yml new file mode 100644 index 000000000..aee9281c3 --- /dev/null +++ b/vale-styles/FluentBit/Colons.yml @@ -0,0 +1,8 @@ +extends: existence +message: "'%s' should be in lowercase." +link: 'https://developers.google.com/style/colons' +nonword: true +level: suggestion +scope: sentence +tokens: + - ':\s[A-Z]' \ No newline at end of file diff --git a/vale-styles/FluentBit/Contractions.yml b/vale-styles/FluentBit/Contractions.yml new file mode 100644 index 000000000..525687ca6 --- /dev/null +++ b/vale-styles/FluentBit/Contractions.yml @@ -0,0 +1,30 @@ +extends: substitution +message: "Feel free to use '%s' instead of '%s'." +link: 'https://developers.google.com/style/contractions' +level: suggestion +ignorecase: true +action: + name: replace +swap: + are not: aren't + cannot: can't + could not: couldn't + did not: didn't + do not: don't + does not: doesn't + has not: hasn't + have not: haven't + how is: how's + is not: isn't + it is: it's + should not: shouldn't + that is: that's + they are: they're + was not: wasn't + we are: we're + we have: we've + were not: weren't + what is: what's + when is: when's + where is: where's + will not: won't \ No newline at end of file diff --git a/vale-styles/FluentBit/DateFormat.yml b/vale-styles/FluentBit/DateFormat.yml new file mode 100644 index 000000000..c70b3b7bc --- /dev/null +++ b/vale-styles/FluentBit/DateFormat.yml @@ -0,0 +1,9 @@ +extends: existence +message: "Use 'July 31, 2016' format, not '%s'." +link: 'https://developers.google.com/style/dates-times' +ignorecase: true +level: suggestion +nonword: true +tokens: + - '\d{1,2}(?:\.|/)\d{1,2}(?:\.|/)\d{4}' + - '\d{1,2} (?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)|May|Jun(?:e)|Jul(?:y)|Aug(?:ust)|Sep(?:tember)?|Oct(?:ober)|Nov(?:ember)?|Dec(?:ember)?) \d{4}' \ No newline at end of file diff --git a/vale-styles/FluentBit/Directional.yml b/vale-styles/FluentBit/Directional.yml new file mode 100644 index 000000000..1d18a0f4d --- /dev/null +++ b/vale-styles/FluentBit/Directional.yml @@ -0,0 +1,8 @@ +--- +extends: existence +message: "Verify your use of '%s' with the Style Guide." +level: suggestion +ignorecase: true +tokens: + - above + - below diff --git a/vale-styles/FluentBit/DontUse.yml b/vale-styles/FluentBit/DontUse.yml new file mode 100644 index 000000000..8308555d7 --- /dev/null +++ b/vale-styles/FluentBit/DontUse.yml @@ -0,0 +1,18 @@ +--- +extends: existence +message: "We don't use '%s'." +ignorecase: true +level: suggestion +tokens: + - a.k.a. + - aka + - and/or + - at this point + - desire + - it is recommended that + - just + - note that + - please + - quite + - such that + - thus diff --git a/vale-styles/FluentBit/Drilldown.yml b/vale-styles/FluentBit/Drilldown.yml new file mode 100644 index 000000000..ca7cb2ed7 --- /dev/null +++ b/vale-styles/FluentBit/Drilldown.yml @@ -0,0 +1,8 @@ +--- +extends: sequence +message: "Use drilldown as an adjective or noun." +level: suggestion +ignorecase: true +tokens: + - tag: NN|JJ + pattern: '(?:drill down|drill-down)' diff --git a/vale-styles/FluentBit/DrilldownVerb.yml b/vale-styles/FluentBit/DrilldownVerb.yml new file mode 100644 index 000000000..3dd6ca047 --- /dev/null +++ b/vale-styles/FluentBit/DrilldownVerb.yml @@ -0,0 +1,8 @@ +--- +extends: sequence +message: "Use drill down as a verb." +level: suggestion +ignorecase: true +tokens: + - tag: VB|VBD|VBG|VBN|VBP|VBZ + pattern: '(?:drilldown|drill-down)' diff --git a/vale-styles/FluentBit/Ellipses.yml b/vale-styles/FluentBit/Ellipses.yml new file mode 100644 index 000000000..6b93a0c44 --- /dev/null +++ b/vale-styles/FluentBit/Ellipses.yml @@ -0,0 +1,9 @@ +extends: existence +message: "In general, don't use an ellipsis." +link: 'https://developers.google.com/style/ellipses' +nonword: true +level: suggestion +action: + name: remove +tokens: + - '\.\.\.' \ No newline at end of file diff --git a/vale-styles/FluentBit/EmDash.yml b/vale-styles/FluentBit/EmDash.yml new file mode 100644 index 000000000..c0de45e89 --- /dev/null +++ b/vale-styles/FluentBit/EmDash.yml @@ -0,0 +1,12 @@ +extends: existence +message: "Don't put a space before or after a dash." +link: 'https://developers.google.com/style/dashes' +nonword: true +level: suggestion +action: + name: edit + params: + - remove + - ' ' +tokens: + - '\s[—–]\s' \ No newline at end of file diff --git a/vale-styles/FluentBit/EnDash.yml b/vale-styles/FluentBit/EnDash.yml new file mode 100644 index 000000000..e69e4bcbb --- /dev/null +++ b/vale-styles/FluentBit/EnDash.yml @@ -0,0 +1,13 @@ +extends: existence +message: "Use an em dash ('—') instead of '–'." +link: 'https://developers.google.com/style/dashes' +nonword: true +level: suggestion +action: + name: edit + params: + - replace + - '-' + - '—' +tokens: + - '–' \ No newline at end of file diff --git a/vale-styles/FluentBit/Exclamation.yml b/vale-styles/FluentBit/Exclamation.yml new file mode 100644 index 000000000..b77798361 --- /dev/null +++ b/vale-styles/FluentBit/Exclamation.yml @@ -0,0 +1,7 @@ +extends: existence +message: "Don't use exclamation points in text." +link: 'https://developers.google.com/style/exclamation-points' +nonword: true +level: suggestion +tokens: + - '\w!(?:\s|$)' diff --git a/vale-styles/FluentBit/FirstPerson.yml b/vale-styles/FluentBit/FirstPerson.yml new file mode 100644 index 000000000..e8793eddf --- /dev/null +++ b/vale-styles/FluentBit/FirstPerson.yml @@ -0,0 +1,13 @@ +extends: existence +message: "Avoid first-person pronouns such as '%s'." +link: 'https://developers.google.com/style/pronouns#personal-pronouns' +ignorecase: true +level: suggestion +nonword: true +tokens: + - (?:^|\s)I\s + - (?:^|\s)I,\s + - \bI'm\b + - \bme\b + - \bmy\b + - \bmine\b \ No newline at end of file diff --git a/vale-styles/FluentBit/FutureTense.yml b/vale-styles/FluentBit/FutureTense.yml new file mode 100644 index 000000000..17d1bd6a6 --- /dev/null +++ b/vale-styles/FluentBit/FutureTense.yml @@ -0,0 +1,10 @@ +--- +extends: existence +message: "'%s' might be in future tense. Strive for active voice and present tense in your documentation." +ignorecase: true +level: suggestion +raw: + - "(going to( |\n|[[:punct:]])[a-zA-Z]*|" + - "will( |\n|[[:punct:]])[a-zA-Z]*|" + - "won't( |\n|[[:punct:]])[a-zA-Z]*|" + - "[a-zA-Z]*'ll( |\n|[[:punct:]])[a-zA-Z]*)" diff --git a/vale-styles/FluentBit/Gender.yml b/vale-styles/FluentBit/Gender.yml new file mode 100644 index 000000000..9b5689ebd --- /dev/null +++ b/vale-styles/FluentBit/Gender.yml @@ -0,0 +1,9 @@ +extends: existence +message: "Don't use '%s' as a gender-neutral pronoun." +link: 'https://developers.google.com/style/pronouns#gender-neutral-pronouns' +level: suggestion +ignorecase: true +tokens: + - he/she + - s/he + - \(s\)he \ No newline at end of file diff --git a/vale-styles/FluentBit/GenderBias.yml b/vale-styles/FluentBit/GenderBias.yml new file mode 100644 index 000000000..3a3b6985e --- /dev/null +++ b/vale-styles/FluentBit/GenderBias.yml @@ -0,0 +1,45 @@ +extends: substitution +message: "Consider using '%s' instead of '%s'." +link: 'https://developers.google.com/style/inclusive-documentation' +ignorecase: true +level: suggestion +swap: + (?:alumna|alumnus): graduate + (?:alumnae|alumni): graduates + air(?:m[ae]n|wom[ae]n): pilot(s) + anchor(?:m[ae]n|wom[ae]n): anchor(s) + authoress: author + camera(?:m[ae]n|wom[ae]n): camera operator(s) + chair(?:m[ae]n|wom[ae]n): chair(s) + congress(?:m[ae]n|wom[ae]n): member(s) of congress + door(?:m[ae]|wom[ae]n): concierge(s) + draft(?:m[ae]n|wom[ae]n): drafter(s) + fire(?:m[ae]n|wom[ae]n): firefighter(s) + fisher(?:m[ae]n|wom[ae]n): fisher(s) + fresh(?:m[ae]n|wom[ae]n): first-year student(s) + garbage(?:m[ae]n|wom[ae]n): waste collector(s) + lady lawyer: lawyer + ladylike: courteous + landlord: building manager + mail(?:m[ae]n|wom[ae]n): mail carriers + man and wife: husband and wife + man enough: strong enough + mankind: human kind + manmade: manufactured + manpower: personnel + men and girls: men and women + middle(?:m[ae]n|wom[ae]n): intermediary + news(?:m[ae]n|wom[ae]n): journalist(s) + ombuds(?:man|woman): ombuds + oneupmanship: upstaging + poetess: poet + police(?:m[ae]n|wom[ae]n): police officer(s) + repair(?:m[ae]n|wom[ae]n): technician(s) + sales(?:m[ae]n|wom[ae]n): salesperson or sales people + service(?:m[ae]n|wom[ae]n): soldier(s) + steward(?:ess)?: flight attendant + tribes(?:m[ae]n|wom[ae]n): tribe member(s) + waitress: waiter + woman doctor: doctor + woman scientist[s]?: scientist(s) + work(?:m[ae]n|wom[ae]n): worker(s) \ No newline at end of file diff --git a/vale-styles/FluentBit/HeadingPunctuation.yml b/vale-styles/FluentBit/HeadingPunctuation.yml new file mode 100644 index 000000000..659260b27 --- /dev/null +++ b/vale-styles/FluentBit/HeadingPunctuation.yml @@ -0,0 +1,13 @@ +extends: existence +message: "Don't put a period at the end of a heading." +link: 'https://developers.google.com/style/capitalization#capitalization-in-titles-and-headings' +nonword: true +level: suggestion +scope: heading +action: + name: edit + params: + - remove + - '.' +tokens: + - '[a-z0-9][.]\s*$' \ No newline at end of file diff --git a/vale-styles/FluentBit/Headings.yml b/vale-styles/FluentBit/Headings.yml new file mode 100644 index 000000000..2be26b3b3 --- /dev/null +++ b/vale-styles/FluentBit/Headings.yml @@ -0,0 +1,73 @@ +extends: capitalization +message: "'%s' should use sentence-style capitalization." +link: 'https://developers.google.com/style/capitalization#capitalization-in-titles-and-headings' +level: suggestion +scope: heading +match: $sentence +indicators: + - ':' +exceptions: + - Amazon + - Amazon CloudWatch + - Amazon Kinesis Firehose + - Amazon Kinesis Streams + - API + - APIs + - Azure + - BuildKite + - CircleCI + - CLI + - CloudWatch + - Code + - Collector + - Cosmos + - Crowdstrike + - cURL + - Datadog + - Docker + - DogStatsD + - Elastic Cloud + - Emmet + - EventBridge + - Fluent Bit + - GCP + - GitLab + - GitHub + - Google + - Google Cloud + - Google Cloud Platform + - Grafana + - gRPC + - I + - InfluxDB + - Kinesis + - Kubernetes + - LaunchDarkly + - Linux + - macOS + - Marketplace + - MongoDB + - New Relic + - Observability Platform + - Okta + - OpenMetrics + - OpenTelemetry + - Opsgenie + - PagerDuty + - Prometheus + - PromQL + - REPL + - ServiceMonitor + - SignalFx + - Slack + - StatsD + - Studio + - Tanzu + - Telemetry Pipeline + - Terraform + - TypeScript + - URLs + - VictorOps + - Visual + - VS + - Windows diff --git a/vale-styles/FluentBit/Latin.yml b/vale-styles/FluentBit/Latin.yml new file mode 100644 index 000000000..ca6f3abb7 --- /dev/null +++ b/vale-styles/FluentBit/Latin.yml @@ -0,0 +1,17 @@ +extends: substitution +message: "Use '%s' instead of '%s'." +link: 'https://developers.google.com/style/abbreviations' +ignorecase: true +level: suggestion +nonword: true +action: + name: replace +swap: + '\b(?:eg|e\.g\.)[\s,]': for example + '\b(?:ie|i\.e\.)[\s,]': that is + 'ad-hoc': if needed + '[\s]et al[\s]': and others + '[\s]etc[\s|.]': and so on + '[\s]via[\s]': through or by using + 'vice versa': and the reverse + '[\s]vs[\s|.]': versus diff --git a/vale-styles/FluentBit/LyHyphens.yml b/vale-styles/FluentBit/LyHyphens.yml new file mode 100644 index 000000000..97e092a33 --- /dev/null +++ b/vale-styles/FluentBit/LyHyphens.yml @@ -0,0 +1,14 @@ +extends: existence +message: "'%s' doesn't need a hyphen." +link: 'https://developers.google.com/style/hyphens' +level: suggestion +ignorecase: false +nonword: true +action: + name: edit + params: + - replace + - '-' + - ' ' +tokens: + - '\s[^\s-]+ly-' \ No newline at end of file diff --git a/vale-styles/FluentBit/MayMightCan.yml b/vale-styles/FluentBit/MayMightCan.yml new file mode 100644 index 000000000..56ce95f06 --- /dev/null +++ b/vale-styles/FluentBit/MayMightCan.yml @@ -0,0 +1,7 @@ +--- +extends: existence +message: "Use 'can' for permissions or 'might' for possibility." +level: suggestion +ignorecase: true +tokens: + - may diff --git a/vale-styles/FluentBit/NonStandardQuotes.yml b/vale-styles/FluentBit/NonStandardQuotes.yml new file mode 100644 index 000000000..40feaafb3 --- /dev/null +++ b/vale-styles/FluentBit/NonStandardQuotes.yml @@ -0,0 +1,8 @@ +--- +extends: existence +message: 'Use standard single quotes or double quotes only. Do not use left or right quotes.' +level: suggestion +ignorecase: true +scope: raw +raw: + - '[‘’“”]' diff --git a/vale-styles/FluentBit/OptionalPlurals.yml b/vale-styles/FluentBit/OptionalPlurals.yml new file mode 100644 index 000000000..50bf2c247 --- /dev/null +++ b/vale-styles/FluentBit/OptionalPlurals.yml @@ -0,0 +1,12 @@ +extends: existence +message: "Don't use plurals in parentheses such as in '%s'." +link: 'https://developers.google.com/style/plurals-parentheses' +level: suggestion +nonword: true +action: + name: edit + params: + - remove + - '(s)' +tokens: + - '\b\w+\(s\)' diff --git a/vale-styles/FluentBit/Ordinal.yml b/vale-styles/FluentBit/Ordinal.yml new file mode 100644 index 000000000..cd836d5a5 --- /dev/null +++ b/vale-styles/FluentBit/Ordinal.yml @@ -0,0 +1,7 @@ +extends: existence +message: "Spell out all ordinal numbers ('%s') in text." +link: 'https://developers.google.com/style/numbers' +level: suggestion +nonword: true +tokens: + - \d+(?:st|nd|rd|th) \ No newline at end of file diff --git a/vale-styles/FluentBit/Passive.yml b/vale-styles/FluentBit/Passive.yml new file mode 100644 index 000000000..b52c01204 --- /dev/null +++ b/vale-styles/FluentBit/Passive.yml @@ -0,0 +1,184 @@ +extends: existence +link: 'https://developers.google.com/style/voice' +message: "In general, use active voice instead of passive voice ('%s')." +ignorecase: true +level: suggestion +raw: + - \b(am|are|were|being|is|been|was|be)\b\s* +tokens: + - '[\w]+ed' + - awoken + - beat + - become + - been + - begun + - bent + - beset + - bet + - bid + - bidden + - bitten + - bled + - blown + - born + - bought + - bound + - bred + - broadcast + - broken + - brought + - built + - burnt + - burst + - cast + - caught + - chosen + - clung + - come + - cost + - crept + - cut + - dealt + - dived + - done + - drawn + - dreamt + - driven + - drunk + - dug + - eaten + - fallen + - fed + - felt + - fit + - fled + - flown + - flung + - forbidden + - foregone + - forgiven + - forgotten + - forsaken + - fought + - found + - frozen + - given + - gone + - gotten + - ground + - grown + - heard + - held + - hidden + - hit + - hung + - hurt + - kept + - knelt + - knit + - known + - laid + - lain + - leapt + - learnt + - led + - left + - lent + - let + - lighted + - lost + - made + - meant + - met + - misspelt + - mistaken + - mown + - overcome + - overdone + - overtaken + - overthrown + - paid + - pled + - proven + - put + - quit + - read + - rid + - ridden + - risen + - run + - rung + - said + - sat + - sawn + - seen + - sent + - set + - sewn + - shaken + - shaven + - shed + - shod + - shone + - shorn + - shot + - shown + - shrunk + - shut + - slain + - slept + - slid + - slit + - slung + - smitten + - sold + - sought + - sown + - sped + - spent + - spilt + - spit + - split + - spoken + - spread + - sprung + - spun + - stolen + - stood + - stridden + - striven + - struck + - strung + - stuck + - stung + - stunk + - sung + - sunk + - swept + - swollen + - sworn + - swum + - swung + - taken + - taught + - thought + - thrived + - thrown + - thrust + - told + - torn + - trodden + - understood + - upheld + - upset + - wed + - wept + - withheld + - withstood + - woken + - won + - worn + - wound + - woven + - written + - wrung \ No newline at end of file diff --git a/vale-styles/FluentBit/Periods.yml b/vale-styles/FluentBit/Periods.yml new file mode 100644 index 000000000..0333c3160 --- /dev/null +++ b/vale-styles/FluentBit/Periods.yml @@ -0,0 +1,7 @@ +extends: existence +message: "Don't use periods with acronyms or initialisms such as '%s'." +link: 'https://developers.google.com/style/abbreviations' +level: suggestion +nonword: true +tokens: + - '\b(?:[A-Z]\.){3,}' \ No newline at end of file diff --git a/vale-styles/FluentBit/Possessives.yml b/vale-styles/FluentBit/Possessives.yml new file mode 100644 index 000000000..1d0a74e0c --- /dev/null +++ b/vale-styles/FluentBit/Possessives.yml @@ -0,0 +1,7 @@ +--- +extends: existence +message: "Rewrite '%s' to not use 's." +level: suggestion +ignorecase: true +tokens: + - Bit's diff --git a/vale-styles/FluentBit/Quotes.yml b/vale-styles/FluentBit/Quotes.yml new file mode 100644 index 000000000..f9c927459 --- /dev/null +++ b/vale-styles/FluentBit/Quotes.yml @@ -0,0 +1,7 @@ +extends: existence +message: "Commas and periods go inside quotation marks." +link: 'https://developers.google.com/style/quotation-marks' +level: suggestion +nonword: true +tokens: + - '"[^"]+"[.,?]' \ No newline at end of file diff --git a/vale-styles/FluentBit/Ranges.yml b/vale-styles/FluentBit/Ranges.yml new file mode 100644 index 000000000..78af6f999 --- /dev/null +++ b/vale-styles/FluentBit/Ranges.yml @@ -0,0 +1,7 @@ +extends: existence +message: "Don't add words such as 'from' or 'between' to describe a range of numbers." +link: 'https://developers.google.com/style/hyphens' +nonword: true +level: suggestion +tokens: + - '(?:from|between)\s\d+\s?-\s?\d+' \ No newline at end of file diff --git a/vale-styles/FluentBit/Repetition.yml b/vale-styles/FluentBit/Repetition.yml new file mode 100644 index 000000000..a5158b8b9 --- /dev/null +++ b/vale-styles/FluentBit/Repetition.yml @@ -0,0 +1,7 @@ +--- +extends: repetition +message: '"%s" is repeated.' +level: suggestion +alpha: true +tokens: + - '[^\s]+' diff --git a/vale-styles/FluentBit/SentenceLengthLong.yml b/vale-styles/FluentBit/SentenceLengthLong.yml new file mode 100644 index 000000000..556580b10 --- /dev/null +++ b/vale-styles/FluentBit/SentenceLengthLong.yml @@ -0,0 +1,7 @@ +--- +extends: occurrence +message: "Improve readability by using fewer than 35 words in this sentence." +scope: sentence +level: suggestion +max: 35 +token: \b(\w+)\b diff --git a/vale-styles/FluentBit/Simplicity.yml b/vale-styles/FluentBit/Simplicity.yml new file mode 100644 index 000000000..e9b779763 --- /dev/null +++ b/vale-styles/FluentBit/Simplicity.yml @@ -0,0 +1,12 @@ +--- +extends: existence +message: 'Avoid words like "%s" that imply ease of use, because the user may find this action difficult.' +level: suggestion +ignorecase: true +tokens: + - easy + - easily + - handy + - simple + - simply + - useful diff --git a/vale-styles/FluentBit/Slang.yml b/vale-styles/FluentBit/Slang.yml new file mode 100644 index 000000000..b43eeb299 --- /dev/null +++ b/vale-styles/FluentBit/Slang.yml @@ -0,0 +1,11 @@ +extends: existence +message: "Don't use internet slang abbreviations such as '%s'." +link: 'https://developers.google.com/style/abbreviations' +ignorecase: true +level: suggestion +tokens: + - 'tl;dr' + - ymmv + - rtfm + - imo + - fwiw \ No newline at end of file diff --git a/vale-styles/FluentBit/Spacing.yml b/vale-styles/FluentBit/Spacing.yml new file mode 100644 index 000000000..57c52f046 --- /dev/null +++ b/vale-styles/FluentBit/Spacing.yml @@ -0,0 +1,8 @@ +extends: existence +message: "'%s' should have one space." +link: 'https://developers.google.com/style/sentence-spacing' +level: suggestion +nonword: true +tokens: + - '[a-z][.?!] {2,}[A-Z]' + - '[a-z][.?!][A-Z]' \ No newline at end of file diff --git a/vale-styles/FluentBit/Spelling-exceptions.txt b/vale-styles/FluentBit/Spelling-exceptions.txt new file mode 100644 index 000000000..ef3b7bcbe --- /dev/null +++ b/vale-styles/FluentBit/Spelling-exceptions.txt @@ -0,0 +1,198 @@ +accessor +Alertmanager +allowlist +Ansible +API +APIs +Appname +autoscale +autoscaler +autoscaling +backoff +backpressure +BitBake +Blackhole +blocklist +Buildkite +cAdvisor +Calyptia +chronotf +clickstreams +CloudWatch +CMake +cmdlet +Config +Coralogix +coroutine +coroutines +Crowdstrike +CRDs +DaemonSet +Dash0 +Datadog +Datagen +datapoint +datapoints +Datastream +declaratively +deduplicate +Deployer +deprovision +deprovisioned +deprovisioning +deprovisions +Devo +Distroless +DogStatsD +downsample +downsampled +downsamples +downsampling +downscale +downscaling +downscales +dri +Dynatrace +Elasticsearch +endcode +endhint +endtab +endtabs +Exabeam +Fargate +Firehose +FluentBit +Fluentd +github +glibc +Golang +golib +Grafana +Graphite +Greylog +grpc_code +grpc_method +grpc_service +gzip +HashiCorp +hostname +Hostname +Ingester +Keepalive +Istio +jemalloc +keepalive +keyless +Kinesis +kubectl +kubelet +Kubernetes +Kusto +labelset +loadgenerator +Logstash +Lua +Lucene +macOS +Mandiant +matchers +Minishift +minikube +MTTx +multithreading +Musl +namespace +namespaces +netcat +Nginx +OAuth +Okta +Oniguruma +OpenTelemetry +Opsgenie +OTel +PagerDuty +performant +persistable +Postgres +PowerShell +prepopulate +Profiler +Prometheus +PromQL +Protobuf +proxying +Pulumi +Pushgateway +quantile +quantiles +queryable +Queryable +Raspbian +rdkafka +Redpanda +rollup +Rollup +rollups +Rollups +routable +runbook +runbooks +Scalyr +SDKs +SELinux +serverless +ServiceDiscovery +ServiceMonitor +ServiceMonitors +sharding +SignalFx +Signup +Sigstore +sparkline +sparklines +Sparklines +Splunk +Stackdriver +StatsD +stderr +stdout +strftime +subcommand +subcommands +subquery +subrecord +substring +syslog +systemctl +Systemd +Tanzu +Telegraf +templated +temporality +Terraform +Thanos +Timeshift +tolerations +tooltip +tooltips +uber +unaggregated +unary +Unary +unmuted +unsort +UUIDs +Vectra +Vercel +VictoriaMetrics +VictorOps +Vivo +VMs +Wavefront +Worldmap +Yocto +Zipkin +Zsh +Zstandard +zstd diff --git a/vale-styles/FluentBit/Spelling.yml b/vale-styles/FluentBit/Spelling.yml new file mode 100644 index 000000000..46cc6dc74 --- /dev/null +++ b/vale-styles/FluentBit/Spelling.yml @@ -0,0 +1,5 @@ +extends: spelling +message: "Spelling check: '%s'?" +level: suggestion +ignore: + - FluentBit/Spelling-exceptions.txt diff --git a/vale-styles/FluentBit/Subjunctive.yml b/vale-styles/FluentBit/Subjunctive.yml new file mode 100644 index 000000000..f7087389c --- /dev/null +++ b/vale-styles/FluentBit/Subjunctive.yml @@ -0,0 +1,14 @@ +--- +extends: existence +message: "Use the indicative or imperative moods when writing docs." +ignorecase: true +level: suggestion +tokens: + - should + - shouldn't + - should not + - won't + - would + - wouldn't + - could + - couldn't diff --git a/vale-styles/FluentBit/Terms.yml b/vale-styles/FluentBit/Terms.yml new file mode 100644 index 000000000..6ad4cf2e3 --- /dev/null +++ b/vale-styles/FluentBit/Terms.yml @@ -0,0 +1,13 @@ +--- +extends: substitution +message: Use '%s' instead of '%s'. +level: suggestion +ignorecase: true +scope: paragraph +action: + name: replace +swap: + datapoints: data points + Terraform Provider: Terraform provider + timeseries: time series + topology: placement diff --git a/vale-styles/FluentBit/Units.yml b/vale-styles/FluentBit/Units.yml new file mode 100644 index 000000000..786c1d8bb --- /dev/null +++ b/vale-styles/FluentBit/Units.yml @@ -0,0 +1,11 @@ +extends: existence +message: "Put a nonbreaking space between the number and the unit in '%s'." +link: 'https://developers.google.com/style/units-of-measure' +nonword: true +level: suggestion +tokens: + - \d+(?:B|kB|MB|GB|TB) + - \d+(?:ns|ms|s|min|h|d) + +exceptions: + - k3s diff --git a/vale-styles/FluentBit/UserFocus.yml b/vale-styles/FluentBit/UserFocus.yml new file mode 100644 index 000000000..340ad71e7 --- /dev/null +++ b/vale-styles/FluentBit/UserFocus.yml @@ -0,0 +1,17 @@ +--- +extends: existence +message: "Rewrite to put the focus on what the user wants to do rather than on how the document is laid out." +ignorecase: true +level: suggestion +tokens: + - The purpose of this document is + - This document (?:describes|explains|shows) + - This page (?:describes|explains|shows) + - This page (?:describes|explains|shows) + - This document (?:describes|explains|shows) + - This section (?:describes|explains|shows) + - This section (?:describes|explains|shows) + - The following page (?:describes|explains|shows) + - The following document (?:describes|explains|shows) + - The following section (?:describes|explains|shows) + - This topic (?:describes|explains|shows) diff --git a/vale-styles/FluentBit/We.yml b/vale-styles/FluentBit/We.yml new file mode 100644 index 000000000..ffe69e65d --- /dev/null +++ b/vale-styles/FluentBit/We.yml @@ -0,0 +1,11 @@ +extends: existence +message: "Try to avoid using first-person plural like '%s'." +link: 'https://developers.google.com/style/pronouns#personal-pronouns' +level: suggestion +ignorecase: true +tokens: + - we + - we'(?:ve|re) + - ours? + - us + - let's \ No newline at end of file diff --git a/vale-styles/FluentBit/WordList.yml b/vale-styles/FluentBit/WordList.yml new file mode 100644 index 000000000..8286be776 --- /dev/null +++ b/vale-styles/FluentBit/WordList.yml @@ -0,0 +1,78 @@ +extends: substitution +message: "Use '%s' instead of '%s'." +link: 'https://developers.google.com/style/word-list' +level: suggestion +ignorecase: false +action: + name: replace +swap: + '(?:API Console|dev|developer) key': API key + '(?:cell ?phone|smart ?phone)': phone|mobile phone + '(?:dev|developer|APIs) console': API console + '(?:e-mail|Email|E-mail)': email + '(?:file ?path|path ?name)': path + '(?:kill|terminate|abort)': stop|exit|cancel|end + '(?:OAuth ?2|Oauth)': OAuth 2.0 + '(?:ok|Okay)': OK|okay + '(?:WiFi|wifi)': Wi-Fi + '[\.]+apk': APK + '3\-D': 3D + 'Google (?:I\-O|IO)': Google I/O + 'tap (?:&|and) hold': touch & hold + 'un(?:check|select)': clear + # account name: username + action bar: app bar + admin: administrator + Ajax: AJAX + allows you to: lets you + Android device: Android-powered device + android: Android + API explorer: APIs Explorer + approx\.: approximately + as well as: and + authN: authentication + authZ: authorization + autoupdate: automatically update + cellular data: mobile data + cellular network: mobile network + chapter: documents|pages|sections + check box: checkbox + check: select + click on: click|click in + Container Engine: Kubernetes Engine + content type: media type + curated roles: predefined roles + data are: data is + Developers Console: Google API Console|API Console + ephemeral IP address: ephemeral external IP address + fewer data: less data + file name: filename + firewalls: firewall rules + functionality: capability|feature + Google account: Google Account + Google accounts: Google Accounts + Googling: search with Google + grayed-out: unavailable + HTTPs: HTTPS + in order to: to + # ingest: import|load + k8s: Kubernetes + long press: touch & hold + network IP address: internal IP address + omnibox: address bar + open-source: open source + overview screen: recents screen + regex: regular expression + SHA1: SHA-1|HAS-SHA1 + sign into: sign in to + \w* ?sign-?on: single sign-on + static IP address: static external IP address + stylesheet: style sheet + synch: sync + tablename: table name + tablet: device + touch: tap + url: URL + vs\.: versus + wish: want + World Wide Web: web