From eea8a2806d6c3d19dde2ffadd48e2870c8dc9bc8 Mon Sep 17 00:00:00 2001 From: ArtemTrofimushkin Date: Wed, 23 Jun 2021 02:29:22 +0300 Subject: [PATCH] enhancement(azure_blob_sink): add draft for docs reference & update default value for blob_prefix Signed-off-by: ArtemTrofimushkin --- .../reference/components/sinks/azure_blob.cue | 201 ++++++++++++++++++ docs/reference/services/azure_blob.cue | 10 + docs/reference/urls.cue | 2 + src/sinks/azure_blob.rs | 2 +- 4 files changed, 214 insertions(+), 1 deletion(-) create mode 100644 docs/reference/components/sinks/azure_blob.cue create mode 100644 docs/reference/services/azure_blob.cue diff --git a/docs/reference/components/sinks/azure_blob.cue b/docs/reference/components/sinks/azure_blob.cue new file mode 100644 index 0000000000000..fae69c76df63f --- /dev/null +++ b/docs/reference/components/sinks/azure_blob.cue @@ -0,0 +1,201 @@ +package metadata + +components: sinks: azure_blob: { + title: "Azure Blob Storage" + + classes: { + commonly_used: true + delivery: "at_least_once" + development: "beta" + egress_method: "batch" + service_providers: ["Azure"] + stateful: false + } + + features: { + buffer: enabled: true + healthcheck: enabled: true + send: { + batch: { + enabled: true + common: true + max_bytes: 10485760 + timeout_secs: 300 + } + compression: { + enabled: true + default: "gzip" + algorithms: ["none", "gzip"] + levels: ["none", "fast", "default", "best", 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + } + encoding: { + enabled: true + codec: { + enabled: true + default: null + enum: ["ndjson", "text"] + } + } + request: { + enabled: true + concurrency: 50 + rate_limit_duration_secs: 1 + rate_limit_num: 250 + retry_initial_backoff_secs: 1 + retry_max_duration_secs: 10 + timeout_secs: 30 + headers: false + } + tls: enabled: false + to: { + service: services.azure_blob + + interface: { + socket: { + api: { + title: "Azure Blob Service REST API" + url: urls.azure_blob_endpoints + } + direction: "outgoing" + protocols: ["http"] + ssl: "required" + } + } + } + } + } + + support: { + targets: { + "aarch64-unknown-linux-gnu": true + "aarch64-unknown-linux-musl": true + "armv7-unknown-linux-gnueabihf": true + "armv7-unknown-linux-musleabihf": true + "x86_64-apple-darwin": true + "x86_64-pc-windows-msv": true + "x86_64-unknown-linux-gnu": true + "x86_64-unknown-linux-musl": true + } + requirements: [] + warnings: [] + notices: [] + } + + configuration: { + connection_string: { + description: "The Azure Blob Storage Account connection string. Only authentication with access key supported." + required: true + warnings: [] + type: string: { + examples: ["DefaultEndpointsProtocol=https;AccountName=mylogstorage;AccountKey=storageaccountkeybase64encoded;EndpointSuffix=core.windows.net"] + syntax: "literal" + } + } + container_name: { + description: "The Azure Blob Storage Account container name." + required: true + warnings: [] + type: string: { + examples: ["my-logs"] + syntax: "literal" + } + } + blob_prefix: { + category: "File Naming" + common: true + description: "A prefix to apply to all object key names. This should be used to partition your objects, and it's important to end this value with a `/` if you want this to be the root azure storage \"folder\"." + required: false + warnings: [] + type: string: { + default: "blob/%F/" + examples: ["date/%F/", "date/%F/hour/%H/", "year=%Y/month=%m/day=%d/", "kubernetes/{{ metadata.cluster }}/{{ metadata.application_name }}/"] + syntax: "template" + } + } + blob_append_uuid: { + category: "File Naming" + common: false + description: "Whether or not to append a UUID v4 token to the end of the file. This ensures there are no name collisions high volume use cases." + required: false + warnings: [] + type: bool: default: true + } + blob_time_format: { + category: "File Naming" + common: false + description: "The format of the resulting object file name. [`strftime` specifiers](\(urls.strptime_specifiers)) are supported." + required: false + warnings: [] + type: string: { + default: "%s" + syntax: "strftime" + } + } + } + + input: { + logs: true + metrics: null + } + + how_it_works: { + object_naming: { + title: "Object naming" + body: """ + By default, Vector will name your blobs in the following format: + + + + + + ```text + -.log + ``` + + For example: + + ```text + blob/2021-06-23/1560886634-fddd7a0e-fad9-4f7e-9bce-00ae5debc563.log + ``` + + + + + ```text + -.log.gz + ``` + + For example: + + ```text + blob/2021-06-23/1560886634-fddd7a0e-fad9-4f7e-9bce-00ae5debc563.log.gz + ``` + + + + + Vector appends a [UUIDV4](\(urls.uuidv4)) token to ensure there are no name + conflicts in the unlikely event 2 Vector instances are writing data at the same + time. + + You can control the resulting name via the `blob_prefix`, `blob_time_format`, + and `blob_append_uuid` options. + """ + } + } + + telemetry: metrics: { + events_discarded_total: components.sources.internal_metrics.output.metrics.events_discarded_total + processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total + http_error_response_total: components.sources.internal_metrics.output.metrics.http_error_response_total + http_request_errors_total: components.sources.internal_metrics.output.metrics.http_request_errors_total + processed_bytes_total: components.sources.internal_metrics.output.metrics.processed_bytes_total + } +} diff --git a/docs/reference/services/azure_blob.cue b/docs/reference/services/azure_blob.cue new file mode 100644 index 0000000000000..ed1df56f98ab8 --- /dev/null +++ b/docs/reference/services/azure_blob.cue @@ -0,0 +1,10 @@ +package metadata + +services: azure_blob: { + name: "Azure Blob Storage " + thing: "a \(name) account" + url: urls.azure_blob + versions: null + + description: "[Azure Blob Storage][urls.azure_blob] is Microsoft's object storage solution for the cloud. Blob storage is optimized for storing massive amounts of unstructured data. Unstructured data is data that doesn't adhere to a particular data model or definition, such as text or binary data." +} diff --git a/docs/reference/urls.cue b/docs/reference/urls.cue index e831ab59b120e..0156b9c3de361 100644 --- a/docs/reference/urls.cue +++ b/docs/reference/urls.cue @@ -79,6 +79,8 @@ urls: { aws_sqs_api: "\(aws_docs)/AWSSimpleQueueService/latest/APIReference/Welcome.html" aws_sqs_create: "\(aws_docs)/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-configure-create-queue.html" aws_vpc_flow_logs: "\(aws_docs)/vpc/latest/userguide/flow-logs.html" + azure_blob: "https://azure.microsoft.com/en-us/services/storage/blobs/" + azure_blob_endpoints: "https://docs.microsoft.com/en-us/rest/api/storageservices/blob-service-rest-api" azure_monitor: "https://azure.microsoft.com/en-us/services/monitor/" azure_monitor_logs_endpoints: "https://docs.microsoft.com/en-us/rest/api/monitor/" base64: "\(wikipedia)/wiki/Base64" diff --git a/src/sinks/azure_blob.rs b/src/sinks/azure_blob.rs index b0f7e6a7300ff..512dd252444f7 100644 --- a/src/sinks/azure_blob.rs +++ b/src/sinks/azure_blob.rs @@ -162,7 +162,7 @@ impl AzureBlobSinkConfig { .service(blob); let encoding = self.encoding.clone(); - let blob_prefix = self.blob_prefix.as_deref().unwrap_or("blob"); + let blob_prefix = self.blob_prefix.as_deref().unwrap_or("blob/%F/"); let blob_prefix = Template::try_from(blob_prefix)?; let buffer = PartitionBuffer::new(Buffer::new(batch.size, compression)); let sink = PartitionBatchSink::new(svc, buffer, batch.timeout, cx.acker())