/
client.yaml
104 lines (85 loc) · 3.85 KB
/
client.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Notify the Data Engineering team about services on these hosts
contactgroups: 'admins,analytics'
profile::admin::groups:
- analytics-admins
- analytics-privatedata-users
profile::admin::managelingering: true
cluster: analytics
profile::hadoop::common::hadoop_cluster_name: 'analytics-test-hadoop'
profile::hive::client::hive_service_name: 'analytics-test-hive'
profile::analytics::cluster::hdfs_mount::monitoring_user: 'analytics'
# Kerberos config
profile::kerberos::keytabs::keytabs_metadata:
- role: 'analytics'
owner: 'analytics'
group: 'analytics'
filename: 'analytics.keytab'
- role: 'analytics-search'
owner: 'analytics-search'
group: 'analytics-search'
filename: 'analytics-search.keytab'
profile::kerberos::client::show_krb_ticket_info: true
# Context https://phabricator.wikimedia.org/T278353#6976509
profile::kerberos::client::dns_canonicalize_hostname: false
profile::kerberos::client::enable_autorenew: true
profile::debdeploy::client::exclude_mounts:
- /mnt/hdfs
profile::java::java_packages:
- version: '8'
variant: 'jdk'
profile::java::extra_args:
JAVA_TOOL_OPTIONS: "-Dfile.encoding=UTF-8"
profile::monitoring::notifications_enabled: false
profile::presto::cluster_name: analytics-test-presto
profile::presto::discovery_uri: https://analytics-test-presto.eqiad.wmnet:8281
# Temporarily override airflow version to permit phased deployment
profile::airflow::airflow_version: '2.7.3-py3.10-20231127'
# Set up airflow instances.
profile::airflow::instances:
# airflow@analytics_test instance.
analytics_test:
# Since we set security: kerberos a keytab must be deployed for the service_user.
service_user: analytics
service_group: analytics
monitoring_enabled: false
statsd_monitoring_enabled: true
airflow_config:
datahub:
enabled: False
conn_id: datahub_kafka_test
cluster: test
metrics:
statsd_on: True
# statsd-exporter is running on the same machine
statsd_host: localhost
statsd_port: 9125
statsd_prefix: airflow
# StatsD (https://github.com/etsy/statsd) integration settings.
# If you want to avoid emitting all the available metrics, you can configure an
# allow list of prefixes (comma separated) to send only the metrics that start
# with the elements of the list (e.g: "scheduler,executor,dagrun")
metrics_allow_list: operator_failures_,operator_successes_,sla_missed,executor.queued_tasks,dag.,dagrun.duration.,scheduler.scheduler_loop_duration,dag_processing.import_errors,dag_processing.total_parse_time,ti.failures,ti.successes,ti.finish
# If you want to utilise your own custom StatsD client set the relevant
# module path below.
# Note: The module path must exist on your PYTHONPATH for Airflow to pick it up
statsd_custom_client_path: wmf_airflow_common.metrics.custom_statsd_client.CustomStatsClient
connections:
analytics-test-hive:
conn_type: hive_metastore
host: analytics-test-hive.eqiad.wmnet
port: 9083
# Rename authMechanism to auth_mechanism
extra_dejson: {'auth_mechanism': 'GSSAPI'}
datahub_kafka_test:
conn_type: datahub_kafka
host: kafka-test1006.eqiad.wmnet:9092
extra_dejson: {"connection": {"schema_registry_url": "http://karapace1002.eqiad.wmnet:8081"}}
profile::airflow::database_host_default: an-db1001.eqiad.wmnet
profile::contacts::role_contacts: ['Data Engineering']
profile::analytics::conda_analytics::remove_conda_env_pkgs_dir: false
# We need to prevent the removal of the python2 packages because of hive and hive-hcatalog
profile::base::remove_python2_on_bullseye: false
profile::puppet::agent::force_puppet7: true
acmechief_host: acmechief2002.codfw.wmnet
# Store historical data about spark jobs in HDFS
profile::hadoop::spark3::event_log_dir: hdfs:///var/log/spark