Skip to content

Memory leak and cpu increase when using receiver creator with k8s observer and postgresql receiver #40962

Open
@alexgenon

Description

@alexgenon

Component(s)

receiver/receivercreator

What happened?

Description

We upgraded from v0.119.0 to v0.128.0 recently and we noticed a memory leak caused by the receivercreator when used with the k8sobserver and postgresql.
According to the logs, the postgresql receiver is created every 10s but this was already the case in v0.119.0. However, since v0.128.0, we also noted a steady increase in memory heap and CPU usage over time.

Steps to Reproduce

  • Deploy a collector with the configuration below inside Kubernetes
  • Deploy postgresql inside the cluster with the annotation defined in the configuration

Expected Result

  • receivercreator creates once the receivers
  • Memory and CPU remains constant

Actual Result

  • CPU usage increases steadily
  • Memory heap usage increases steadily
    See attached graph.

Image

Image

Collector version

v0.128.0

Environment information

Environment

Kubernetes
Collector installed via the Helm chart

OpenTelemetry Collector configuration

exporters:
  otlp:
    endpoint: opentelemetry-collector-gateway:4319
    retry_on_failure:
      max_elapsed_time: 1h
      max_interval: 5m
    tls:
      insecure: true
  debug:
    sampling_initial: 5
    sampling_thereafter: 200
    verbosity: detailed
extensions:
  health_check:
    endpoint: ${env:MY_POD_IP}:13133
  k8s_observer:
    auth_type: serviceAccount
    observe_pods: true
processors:
  batch: {}
  k8sattributes:
    extract:
      metadata:
      - k8s.namespace.name
      - k8s.deployment.name
      - k8s.statefulset.name
      - k8s.daemonset.name
      - k8s.cronjob.name
      - k8s.job.name
      - k8s.node.name
      - k8s.pod.name
      - k8s.pod.uid
      - k8s.pod.start_time
    passthrough: false
    pod_association:
    - sources:
      - from: resource_attribute
        name: k8s.pod.ip
    - sources:
      - from: resource_attribute
        name: k8s.pod.uid
    - sources:
      - from: connection
  memory_limiter:
    check_interval: 5s
    limit_percentage: 80
    spike_limit_percentage: 25

receivers:
  k8s_cluster:
    allocatable_types_to_report:
    - cpu
    - memory
    - storage
    - ephemeral-storage
    collection_interval: 10s
    metrics:
      k8s.pod.status_reason:
        enabled: true
      k8s.resource_quota.hard_limit:
        enabled: true
      k8s.resource_quota.used:
        enabled: true
    node_conditions_to_report:
    - Ready
    - MemoryPressure
    - DiskPressure
    - PIDPressure
    - NetworkUnavailable
    resource_attributes:
      k8s.container.status.last_terminated_reason:
        enabled: true
      k8s.kubelet.version:
        enabled: true
  receiver_creator:
    receivers:
      postgresql:
        config:
          endpoint: '`endpoint`:5432'
          metrics:
            postgresql.database.locks:
              enabled: true
            postgresql.deadlocks:
              enabled: true
            postgresql.sequential_scans:
              enabled: true
            postgresql.temp_files:
              enabled: true
            postgresql.wal.delay:
              enabled: true
          password: ${PG_PWD}
          username: ${PG_USERNAME}
        rule: type == "pod" && annotations['aegaeon-it.com/o11y_type'] == "postgresql"
    watch_observers:
    - k8s_observer
service:
  extensions:
  - k8s_observer
  - health_check
  pipelines:
    metrics:
      exporters:
      - otlp
      processors:
      - memory_limiter
      - k8sattributes
      - batch
      receivers:
      - receiver_creator
  telemetry:
    logs:
      encoding: json
    metrics:
      level: detailed
      readers:
      - pull:
          exporter:
            prometheus:
              host: 0.0.0.0
              port: 8888

Log output

{"level":"info","ts":"2025-06-27T12:13:42.797Z","caller":"receivercreator@v0.128.0/observerhandler.go:206","msg":"starting receiver","resource":{"service.instance.id":"984c3981-cf89-494d-9522-659d423237fa","service.name":"otelcol-contrib","service.version":"0.128.0"},"otelcol.component.id":"receiver_creator","otelcol.component.kind":"receiver","otelcol.signal":"metrics","name":"postgresql","endpoint":"10.42.1.87","endpoint_id":"k8s_observer/8d8552cd-70c9-449d-9749-f25ded37a41d","config":{"endpoint":"10.42.1.87:5432","metrics":{"postgresql.database.locks":{"enabled":true},"postgresql.deadlocks":{"enabled":true},"postgresql.sequential_scans":{"enabled":true},"postgresql.temp_files":{"enabled":true},"postgresql.wal.delay":{"enabled":true}},"password":"<redacted>","username":"otel"}}
{"level":"info","ts":"2025-06-27T12:13:42.798Z","caller":"receivercreator@v0.128.0/observerhandler.go:206","msg":"starting receiver","resource":{"service.instance.id":"984c3981-cf89-494d-9522-659d423237fa","service.name":"otelcol-contrib","service.version":"0.128.0"},"otelcol.component.id":"receiver_creator","otelcol.component.kind":"receiver","otelcol.signal":"metrics","name":"postgresql","endpoint":"10.42.3.90","endpoint_id":"k8s_observer/809c005d-672d-44bd-84e9-b740fe9d3a63","config":{"endpoint":"10.42.3.90:5432","metrics":{"postgresql.database.locks":{"enabled":true},"postgresql.deadlocks":{"enabled":true},"postgresql.sequential_scans":{"enabled":true},"postgresql.temp_files":{"enabled":true},"postgresql.wal.delay":{"enabled":true}},"password":"<redacted>","username":"otel"}}
{"level":"info","ts":"2025-06-27T12:13:52.794Z","caller":"receivercreator@v0.128.0/observerhandler.go:127","msg":"stopping receiver","resource":{"service.instance.id":"984c3981-cf89-494d-9522-659d423237fa","service.name":"otelcol-contrib","service.version":"0.128.0"},"otelcol.component.id":"receiver_creator","otelcol.component.kind":"receiver","otelcol.signal":"metrics","receiver":{},"endpoint_id":"k8s_observer/8d8552cd-70c9-449d-9749-f25ded37a41d"}
{"level":"info","ts":"2025-06-27T12:13:52.794Z","caller":"receivercreator@v0.128.0/observerhandler.go:127","msg":"stopping receiver","resource":{"service.instance.id":"984c3981-cf89-494d-9522-659d423237fa","service.name":"otelcol-contrib","service.version":"0.128.0"},"otelcol.component.id":"receiver_creator","otelcol.component.kind":"receiver","otelcol.signal":"metrics","receiver":{},"endpoint_id":"k8s_observer/809c005d-672d-44bd-84e9-b740fe9d3a63"}
{"level":"info","ts":"2025-06-27T12:13:52.794Z","caller":"receivercreator@v0.128.0/observerhandler.go:206","msg":"starting receiver","resource":{"service.instance.id":"984c3981-cf89-494d-9522-659d423237fa","service.name":"otelcol-contrib","service.version":"0.128.0"},"otelcol.component.id":"receiver_creator","otelcol.component.kind":"receiver","otelcol.signal":"metrics","name":"postgresql","endpoint":"10.42.1.87","endpoint_id":"k8s_observer/8d8552cd-70c9-449d-9749-f25ded37a41d","config":{"endpoint":"10.42.1.87:5432","metrics":{"postgresql.database.locks":{"enabled":true},"postgresql.deadlocks":{"enabled":true},"postgresql.sequential_scans":{"enabled":true},"postgresql.temp_files":{"enabled":true},"postgresql.wal.delay":{"enabled":true}},"password":"<redacted>","username":"otel"}}
{"level":"info","ts":"2025-06-27T12:13:52.795Z","caller":"receivercreator@v0.128.0/observerhandler.go:206","msg":"starting receiver","resource":{"service.instance.id":"984c3981-cf89-494d-9522-659d423237fa","service.name":"otelcol-contrib","service.version":"0.128.0"},"otelcol.component.id":"receiver_creator","otelcol.component.kind":"receiver","otelcol.signal":"metrics","name":"postgresql","endpoint":"10.42.3.90","endpoint_id":"k8s_observer/809c005d-672d-44bd-84e9-b740fe9d3a63","config":{"endpoint":"10.42.3.90:5432","metrics":{"postgresql.database.locks":{"enabled":true},"postgresql.deadlocks":{"enabled":true},"postgresql.sequential_scans":{"enabled":true},"postgresql.temp_files":{"enabled":true},"postgresql.wal.delay":{"enabled":true}},"password":"<redacted>","username":"otel"}}

Additional context

No response

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions