Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ services:

# Only to test prometheus integration
prometheus:
image: prom/prometheus:v3.0.0-beta.1
image: prom/prometheus:v3.4.2
command:
- --web.enable-remote-write-receiver
- --enable-feature=native-histograms
Expand All @@ -85,13 +85,12 @@ services:
- debug

grafana:
image: grafana/grafana:11.3.1
image: grafana/grafana:12.0.2
ports:
- '9300:3000'
volumes:
- grafana-storage:/var/lib/grafana
- ./docker/grafana/provisioning:/etc/grafana/provisioning
container_name: grafana
restart: unless-stopped
networks:
- primary
Expand All @@ -100,6 +99,8 @@ services:
- GF_INSTALL_PLUGINS=grafana-clickhouse-datasource
- CLICKHOUSE_USER=${CLICKHOUSE_USER:-default}
- CLICKHOUSE_PASSWORD=${CLICKHOUSE_PASSWORD:-changeme}
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
profiles:
- debug

Expand Down Expand Up @@ -261,7 +262,7 @@ services:
volumes:
- ./docker/redis/redis-cluster.conf:/usr/local/etc/redis/redis.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6379", "ping"]
test: ['CMD', 'redis-cli', '-p', '6379', 'ping']
interval: 10s
timeout: 5s
retries: 3
Expand All @@ -279,7 +280,7 @@ services:
volumes:
- ./docker/redis/redis-cluster.conf:/usr/local/etc/redis/redis.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6379", "ping"]
test: ['CMD', 'redis-cli', '-p', '6379', 'ping']
interval: 10s
timeout: 5s
retries: 3
Expand All @@ -297,7 +298,7 @@ services:
volumes:
- ./docker/redis/redis-cluster.conf:/usr/local/etc/redis/redis.conf
healthcheck:
test: ["CMD", "redis-cli", "-p", "6379", "ping"]
test: ['CMD', 'redis-cli', '-p', '6379', 'ping']
interval: 10s
timeout: 5s
retries: 3
Expand Down
185 changes: 185 additions & 0 deletions router/bench-limited-cardinality.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import http from 'k6/http';
import { check } from 'k6';
import { randomString } from 'https://jslib.k6.io/k6-utils/1.2.0/index.js';

/*
Benchmarking script to run a graphql query with a random operation name from a fixed size pool.
Useful to test metric attributes.
*/

export const options = {
stages: [
{ duration: '15s', target: 20 },
{ duration: '15s', target: 50 },
{ duration: '20s', target: 100 },
{ duration: '30m', target: 100 },
],
};

// in the simple case from a clean state it's around (operationName)*5 series per metric
// mostly due to wg_subgraph_id and wg_subgraph_name array exploding

// 300 should be under the default cardinality limit (1500 < 2000)
// 500 should be slightly over the default cardinality limit (2500 > 2000)
const distinctNames = 300;

export function setup() {
let randomNames = [];

for (let i = 0; i < distinctNames; i++) {
randomNames.push(randomString(10, 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'));
}

console.log('Generated ' + distinctNames + ' random names');

return { randomNames };
}

export default function ({ randomNames }) {
let query = `
query $$__REPLACE_ME__$$ {
employees {
# resolved through employees subgraph
id
# overridden by the products subgraph
notes
details {
# resolved through either employees or family subgraph
forename
surname
# resolved through employees subgraph
location {
key {
name
}
}
# resolved through family subgraph
hasChildren
# maritalStatus can return null
maritalStatus
nationality
# pets can return null
pets {
class
gender
name
... on Cat {
type
}
... on Dog {
breed
}
... on Alligator {
dangerous
}
}
}
# resolved through employees subgraph
role {
departments
title
... on Engineer {
engineerType
}
... on Operator {
operatorType
}
}
# resolved through hobbies subgraph
hobbies {
... on Exercise {
category
}
... on Flying {
planeModels
yearsOfExperience
}
... on Gaming {
genres
name
yearsOfExperience
}
... on Other {
name
}
... on Programming {
languages
}
... on Travelling {
countriesLived {
key {
name
}
}
}
}
# resolved through products subgraph
products
}
# can return null
employee(id: 1) {
# resolved through employees subgraph
id
details {
forename
location {
key {
name
}
}
}
}
teammates(team: OPERATIONS) {
# resolved through employees subgraph
id
...EmployeeNameFragment
# resolved through products subgraph
products
}
productTypes {
... on Documentation {
url(product: SDK)
urls(products: [COSMO, MARKETING])
}
... on Consultancy {
lead {
...EmployeeNameFragment
}
name
}
}
a: findEmployees(criteria: {
hasPets: true, nationality: UKRAINIAN, nested: { maritalStatus: ENGAGED }
}) {
...EmployeeNameFragment
}
b: findEmployees(criteria: {
hasPets: true, nationality: GERMAN, nested: { maritalStatus: MARRIED, hasChildren: true }
}) {
...EmployeeNameFragment
}
}

fragment EmployeeNameFragment on Employee {
details {
forename
}
}`;

let headers = {
'Content-Type': 'application/json',
'GraphQL-Client-Name': 'k6',
'GraphQL-Client-Version': '0.0.1',
};

let operationName = randomNames[Math.floor(Math.random() * randomNames.length)];

query = query.replace(/\$\$__REPLACE_ME__\$\$/g, operationName);

let res = http.post('http://localhost:3002/graphql', JSON.stringify({ query: query, operationName: operationName }), {
headers: headers,
});
check(res, {
'is status 200': (r) => r.status === 200 && r.body.includes('errors') === false,
});
}
2 changes: 1 addition & 1 deletion router/core/graph_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ func (s *graphServer) buildGraphMux(ctx context.Context,
rmetric.WithBaseAttributes(baseMetricAttributes),
rmetric.WithLogger(s.logger),
rmetric.WithProcessStartTime(s.processStartTime),
rmetric.WithCardinalityLimit(rmetric.DefaultCardinalityLimit),
rmetric.WithCardinalityLimit(s.metricConfig.CardinalityLimit),
rmetric.WithRouterInfoAttributes(routerInfoBaseAttrs),
)
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions router/core/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -2175,6 +2175,7 @@ func MetricConfigFromTelemetry(cfg *config.Telemetry) *rmetric.Config {
Version: Version,
Attributes: cfg.Metrics.Attributes,
ResourceAttributes: buildResourceAttributes(cfg.ResourceAttributes),
CardinalityLimit: cfg.Metrics.CardinalityLimit,
OpenTelemetry: rmetric.OpenTelemetry{
Enabled: cfg.Metrics.OTLP.Enabled,
RouterRuntime: cfg.Metrics.OTLP.RouterRuntime,
Expand Down
7 changes: 4 additions & 3 deletions router/pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,10 @@ type MetricsOTLPExporter struct {
}

type Metrics struct {
Attributes []CustomAttribute `yaml:"attributes"`
OTLP MetricsOTLP `yaml:"otlp"`
Prometheus Prometheus `yaml:"prometheus"`
Attributes []CustomAttribute `yaml:"attributes"`
OTLP MetricsOTLP `yaml:"otlp"`
Prometheus Prometheus `yaml:"prometheus"`
CardinalityLimit int `yaml:"experiment_cardinality_limit" envDefault:"2000" env:"METRICS_EXPERIMENT_CARDINALITY_LIMIT"`
}

type MetricsOTLP struct {
Expand Down
6 changes: 6 additions & 0 deletions router/pkg/config/config.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,12 @@
"description": "The configuration for the collection and export of metrics. The metrics are collected and exported using the OpenTelemetry protocol (OTLP) and Prometheus.",
"additionalProperties": false,
"properties": {
"experiment_cardinality_limit": {
"type": "integer",
"description": "Sets a hard limit on the number of Metric Points that can be collected during a collection cycle. NOTE: This option is experimental and may change in future versions.",
"minimum": 1,
"default": 2000
},
"attributes": {
"type": "array",
"description": "The attributes to add to OTLP Metrics and Prometheus.",
Expand Down
3 changes: 2 additions & 1 deletion router/pkg/config/testdata/config_defaults.json
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@
"Enabled": false,
"IncludeOperationSha": false
}
}
},
"CardinalityLimit": 2000
}
},
"GraphqlMetrics": {
Expand Down
3 changes: 2 additions & 1 deletion router/pkg/config/testdata/config_full.json
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@
"Enabled": true,
"IncludeOperationSha": false
}
}
},
"CardinalityLimit": 2000
}
},
"GraphqlMetrics": {
Expand Down
8 changes: 7 additions & 1 deletion router/pkg/metric/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ import (
// DefaultServerName Default resource name.
const DefaultServerName = "cosmo-router"

// DefaultCardinalityLimit is the hard limit on the number of metric streams that can be collected for a single instrument.
const DefaultCardinalityLimit = 2000

type PrometheusConfig struct {
Enabled bool
ConnectionStats bool
Expand Down Expand Up @@ -115,6 +118,9 @@ type Config struct {

Attributes []config.CustomAttribute

// CardinalityLimit is the hard limit on the number of metric streams that can be collected for a single instrument.
CardinalityLimit int

// IsUsingCloudExporter indicates whether the cloud exporter is used.
// This value is used for tests to enable/disable the simulated cloud exporter.
IsUsingCloudExporter bool
Expand All @@ -126,12 +132,12 @@ func (c *Config) IsEnabled() bool {

// DefaultConfig returns the default config.
func DefaultConfig(serviceVersion string) *Config {

return &Config{
Name: DefaultServerName,
Version: serviceVersion,
ResourceAttributes: make([]attribute.KeyValue, 0),
Attributes: make([]config.CustomAttribute, 0),
CardinalityLimit: DefaultCardinalityLimit,
OpenTelemetry: OpenTelemetry{
Enabled: false,
RouterRuntime: true,
Expand Down
3 changes: 0 additions & 3 deletions router/pkg/metric/metric_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ import (
"go.opentelemetry.io/otel/sdk/metric"
)

// DefaultCardinalityLimit is the hard limit on the number of metric streams that can be collected for a single instrument.
const DefaultCardinalityLimit = 2000

// Server HTTP metrics.
const (
RequestCounter = "router.http.requests" // Incoming request count total
Expand Down
Loading
Loading