From abe7bdd2d3901827659e09f8a2e970dea7275530 Mon Sep 17 00:00:00 2001
From: Pedro Maximino <53193337+PMax5@users.noreply.github.com>
Date: Thu, 24 Aug 2023 09:55:26 +0200
Subject: [PATCH] Use fluentd official helm chart
Separated the fluentd pipeline in three different config maps:
sources, filters and outputs. The SWAN logs are collected from
the source, filtered with custom filters and pushed to an endpoint
defined in the outputs config map.
Furthermore, included a CA bundle file, which is mounted in the
fluentd pods and includes the CERN CA certificates, in order to be
able to push the filtered logs to a custom endpoint through TLS and
to be able to verify the TLS certificate of the server.
---
swan-cern/Chart.yaml | 4 +-
swan-cern/files/ca-certs/ca-bundle.crt | 96 ++++++++++
swan-cern/templates/fluentd/fluentd_ca.yaml | 7 +
.../fluentd/fluentd_filters.conf.yaml | 131 ++++++++++++++
.../fluentd/fluentd_outputs.conf.yaml | 24 +++
.../fluentd/fluentd_sources.conf.yaml | 40 +++++
swan-cern/values.yaml | 169 +++---------------
7 files changed, 327 insertions(+), 144 deletions(-)
create mode 100644 swan-cern/files/ca-certs/ca-bundle.crt
create mode 100644 swan-cern/templates/fluentd/fluentd_ca.yaml
create mode 100644 swan-cern/templates/fluentd/fluentd_filters.conf.yaml
create mode 100644 swan-cern/templates/fluentd/fluentd_outputs.conf.yaml
create mode 100644 swan-cern/templates/fluentd/fluentd_sources.conf.yaml
diff --git a/swan-cern/Chart.yaml b/swan-cern/Chart.yaml
index 8a7eb957..38064aa0 100644
--- a/swan-cern/Chart.yaml
+++ b/swan-cern/Chart.yaml
@@ -13,8 +13,8 @@ dependencies:
version: 0.6.6
repository: oci://registry.cern.ch/swan/charts
- name: fluentd
- repository: http://registry.cern.ch/chartrepo/cern
- version: 0.1.5
+ version: 0.4.3
+ repository: https://fluent.github.io/helm-charts
- name: gpu-operator
version: 22.9.1
repository: https://helm.ngc.nvidia.com/nvidia
diff --git a/swan-cern/files/ca-certs/ca-bundle.crt b/swan-cern/files/ca-certs/ca-bundle.crt
new file mode 100644
index 00000000..e8eb8d27
--- /dev/null
+++ b/swan-cern/files/ca-certs/ca-bundle.crt
@@ -0,0 +1,96 @@
+# CERN Root Certification Authority 2
+-----BEGIN CERTIFICATE-----
+MIIGqTCCBJGgAwIBAgIQAojDcLlcbrhBX0qrEka4mzANBgkqhkiG9w0BAQ0FADBK
+MQswCQYDVQQGEwJjaDENMAsGA1UEChMEQ0VSTjEsMCoGA1UEAxMjQ0VSTiBSb290
+IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IDIwHhcNMTMwMzE5MTI1NTM2WhcNMzMw
+MzE5MTMwNTM0WjBKMQswCQYDVQQGEwJjaDENMAsGA1UEChMEQ0VSTjEsMCoGA1UE
+AxMjQ0VSTiBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IDIwggIiMA0GCSqG
+SIb3DQEBAQUAA4ICDwAwggIKAoICAQDxqYPFW2qVVi3Rw1NKlEf7x70xF+6a8uE/
+Tu4ZVQF/K2RXI95QLkYfKItZvy9Az3ib/VlUho5f8fBaqy4n70uwC7+qd3Aq1/xQ
+ysykPCbBBAsOSQQpTlhrMD2V5Ya9zrirphOhutddiqV96zBCyMM+Gz5uYv9u+cm4
+tg1EOmAMGh2UNxfTFNVmXKkk7eFTSC1+zgb28H6nd3xzV27sn9bfOfGh//ZPy5gm
+Qx0Oh/tc6WMreWzRZBQm5SJiK0QOzPv09p5WmdY2WxZoqNTFBDACQO7ysFOktc74
+fPVFX/lmt4jFNSZRIOvvaACI/qlEaAJTR4FHIY9uSMsV8DrtzhI1Ucyv3kqlQpbF
+jDouq44IryA/np4s/124bW+x8+n/v+at/AxPjvHBLiGhB+J38Z6KcJogoDnGzIXR
+S+YUr/vGz34jOmkRuDN5STuuAXzyCKFXaoAm0AwjTziIv3E0jxC1taw6FpKevnd1
+CLsTLAEUiEjzStFkDhd/Hpipc57zmMFY8VYet2wVqSFjnt2REWOVbZlbCiMHmSeD
+u5EuZLiU8xlkiaCfn4A5XZ6X0qprbgDviGJtwxzNvTg7Hn0ziW5/ELryfQXCwZJ+
+FVne8Zu8sbgy/sDkX+pyFuyB4XgiM0eMNkoexIXJaRdlMWDIL5ysiIXQKjhynAv5
+KLHbRjciVwIDAQABo4IBiTCCAYUwCwYDVR0PBAQDAgGGMA8GA1UdEwEB/wQFMAMB
+Af8wHQYDVR0OBBYEFPp7+96bDaPyUrds7VsPC6KmpvgEMBAGCSsGAQQBgjcVAQQD
+AgEAMIIBMgYDVR0gBIIBKTCCASUwggEhBgorBgEEAWAKBAEBMIIBETCBwgYIKwYB
+BQUHAgIwgbUegbIAQwBFAFIATgAgAFIAbwBvAHQAIABDAGUAcgB0AGkAZgBpAGMA
+YQB0AGkAbwBuACAAQQB1AHQAaABvAHIAaQB0AHkAIAAyACAAQwBlAHIAdABpAGYA
+aQBjAGEAdABlACAAUABvAGwAaQBjAHkAIABhAG4AZAAgAEMAZQByAHQAaQBmAGkA
+YwBhAHQAZQAgAFAAcgBhAGMAdABpAGMAZQAgAFMAdABhAHQAZQBtAGUAbgB0MEoG
+CCsGAQUFBwIBFj5odHRwOi8vY2FmaWxlcy5jZXJuLmNoL2NhZmlsZXMvY3AtY3Bz
+L2Nlcm4tcm9vdC1jYTItY3AtY3BzLnBkZjANBgkqhkiG9w0BAQ0FAAOCAgEAo0Px
+l4CZ6C6bDH+b6jV5uUO0NIHtvLuVgQLMdKVHtQ2UaxeIrWwD+Kz1FyJCHTRXrCvE
+OFOca9SEYK2XrbqZGvRKdDRsq+XYts6aCampXj5ahh6r4oQJ8U7aLVfziKTK13Gy
+dYFoAUeUrlNklICt3v2wWBaa1tg2oSlU2g4iCg9kYpRnIW3VKSrVsdVk2lUa4EXs
+nTEJ30OS7rqX3SdqZp8G+awtBEReh2XPhRgJ6w3xiScP/UdWYUam2LflCGX3RibB
+/DZhgGHRRoE4/D0kQMP2XTz6cClbNklECTlp0qZIbiaf350HbcDEFzYRSSIi0emv
+kRGcMgsi8yTTU87q8Cr4hETxAF3ZbSVNC0ZaTZ8RBbM9BXguhYzKkVBgG/cMpUjs
+B6tY2HMZbAZ3TKQRb/bRyUigM9DniKWeXkeL/0Nsno+XbcpAqLjtVIRwCg6jTLUi
+1NRsl3BP6C824dVaoI8Ry7m+o6O+mtocw4BMhHfTcoWCO8CWjT0ME67JzaAYa5eM
++OqoWtgbgweBlfO0/3GMnVGMAmI4FlhH2oWKWQgWdgr0Wgh9K05VcxSpJ87/zjhb
+MQn/bEojWmp6eUppPaqNFcELvud41qoe6hLsOYQVUQ1sHi7n6ouhg4BAbwS2iyD2
+uiA6FHTCeLreFGUzs5osPKiz3GE5D6V9she9xIQ=
+-----END CERTIFICATE-----
+
+# CERN Grid Certification Authority
+-----BEGIN CERTIFICATE-----
+MIIJnDCCB4SgAwIBAgIKYQQltAAAAAAACzANBgkqhkiG9w0BAQ0FADBKMQswCQYD
+VQQGEwJjaDENMAsGA1UEChMEQ0VSTjEsMCoGA1UEAxMjQ0VSTiBSb290IENlcnRp
+ZmljYXRpb24gQXV0aG9yaXR5IDIwHhcNMjIwMzI5MDgyNDIyWhcNMzIwMzI5MDgz
+NDIyWjBWMRIwEAYKCZImiZPyLGQBGRYCY2gxFDASBgoJkiaJk/IsZAEZFgRjZXJu
+MSowKAYDVQQDEyFDRVJOIEdyaWQgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwggIi
+MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDS9Ypy1csm0aZA4/QnWe2oaiQI
+LqfeekV8kSSvOhW2peo5cLNIKbXATOo1l2iwIbCWV8SRU2TLKxHIL8fAOJud5n9K
+mEKBew7nzubl1wG93B4dY0KREdb3/QB/7OkG8ZZvLqrvQZVGT1CgJ+NFFUiJ315D
+FWkKctZv27LjQamzCxpX+gZSsmwZmSReY67cnm6P7z+/3xVNhwb+4Z+1Ww4vHhMc
+dh1Dsrkv9vXU01UN752QtQ6l56uQLYEB2+vaHB6IpyC9zAQ/33GulCq8Gbj7ykPd
+9AcRVBeJAErSK+oMHThtdLD7mhTkZivakaNe4O1EhPFH0rWwV45IFN7ipELA5qDx
+djdzo6JtLJQMaSV/TV+amEf2CaKlD0giqGhjfSNiOX5HCmpqV14kbl+7Qho6ykZy
+b1DGpf70yILnX+AUtdpd8lulTu1yg1Bg5cFQskUIk5+s4nsC1VpmeNxYaeFEcYZj
+Ph2mdD7zLo889MtF7kZv7+6J6p4NBL3fQ9Os8/h8XVlfDatzbpVH4jYKKAd4nwJb
+knJaKPE0LzLzVfJBwnDxqe8hb64gI8Frludp+jaOYzvMqlzAe9z4a9971iXIWaaG
+unbAoEkXj69y7MsvCjWXB7o9HdBaS9FL+ZtXTKCyXl+XLFseYQoQburKr+eTcRed
+KLJNj4tRF1799PO69wIDAQABo4IEdjCCBHIwEAYJKwYBBAGCNxUBBAMCAQEwIwYJ
+KwYBBAGCNxUCBBYEFGPCgXhtlBTXUVYziSFk8YWmsNHgMB0GA1UdDgQWBBSloP1m
+WP253Xrhsp2fo9HlUBiU5zCCAS4GA1UdIASCASUwggEhMIIBHQYKKwYBBAFgCgQB
+ATCCAQ0wgb4GCCsGAQUFBwICMIGxHoGuAEMARQBSAE4AIABHAHIAaQBkACAAQwBl
+AHIAdABpAGYAaQBjAGEAdABpAG8AbgAgAEEAdQB0AGgAbwByAGkAdAB5ACAAQwBl
+AHIAdABpAGYAaQBjAGEAdABlACAAUABvAGwAaQBjAHkAIABhAG4AZAAgAEMAZQBy
+AHQAaQBmAGkAYwBhAHQAZQAgAFAAcgBhAGMAdABpAGMAZQAgAFMAdABhAHQAZQBt
+AGUAbgB0MEoGCCsGAQUFBwIBFj5odHRwOi8vY2FmaWxlcy5jZXJuLmNoL2NhZmls
+ZXMvY3AtY3BzL2Nlcm4tZ3JpZC1jYS1jcC1jcHMucGRmADAZBgkrBgEEAYI3FAIE
+DB4KAFMAdQBiAEMAQTALBgNVHQ8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAfBgNV
+HSMEGDAWgBT6e/vemw2j8lK3bO1bDwuipqb4BDCCAUQGA1UdHwSCATswggE3MIIB
+M6CCAS+gggErhlJodHRwOi8vY2FmaWxlcy5jZXJuLmNoL2NhZmlsZXMvY3JsL0NF
+Uk4lMjBSb290JTIwQ2VydGlmaWNhdGlvbiUyMEF1dGhvcml0eSUyMDIuY3JshoHU
+bGRhcDovLy9DTj1DRVJOJTIwUm9vdCUyMENlcnRpZmljYXRpb24lMjBBdXRob3Jp
+dHklMjAyLENOPUNFUk5QS0lST09UMDIsQ049Q0RQLENOPVB1YmxpYyUyMEtleSUy
+MFNlcnZpY2VzLENOPVNlcnZpY2VzLENOPUNvbmZpZ3VyYXRpb24sREM9Y2VybixE
+Qz1jaD9jZXJ0aWZpY2F0ZVJldm9jYXRpb25MaXN0P2Jhc2U/b2JqZWN0Q2xhc3M9
+Y1JMRGlzdHJpYnV0aW9uUG9pbnQwggFEBggrBgEFBQcBAQSCATYwggEyMGcGCCsG
+AQUFBzAChltodHRwOi8vY2FmaWxlcy5jZXJuLmNoL2NhZmlsZXMvY2VydGlmaWNh
+dGVzL0NFUk4lMjBSb290JTIwQ2VydGlmaWNhdGlvbiUyMEF1dGhvcml0eSUyMDIu
+Y3J0MIHGBggrBgEFBQcwAoaBuWxkYXA6Ly8vQ049Q0VSTiUyMFJvb3QlMjBDZXJ0
+aWZpY2F0aW9uJTIwQXV0aG9yaXR5JTIwMixDTj1BSUEsQ049UHVibGljJTIwS2V5
+JTIwU2VydmljZXMsQ049U2VydmljZXMsQ049Q29uZmlndXJhdGlvbixEQz1jZXJu
+LERDPWNoP2NBQ2VydGlmaWNhdGU/YmFzZT9vYmplY3RDbGFzcz1jZXJ0aWZpY2F0
+aW9uQXV0aG9yaXR5MA0GCSqGSIb3DQEBDQUAA4ICAQAv56iMPo0VUkrHxPYLjfyW
+IL/TmYxxYldO8kCTKXyaRO4ZmwD6JjLaclTgSHz7gOKFL35ZF0Rv4nWk/ZJBl+dU
+1udgBjF/uKK0v0m+7iEIOG0HORCCQCDgayuiLomI5eQp8KTgHrswHWL+ESxa3Hdv
+vr7GBG/7EhrYwstm/tOJ8cKaeiooSxHw5Lgsqq229SxfO8fSyS8DAa5eUdWT/dVU
+RDR8lGQShx4R9JOHSDg0y6rE7V0cw/BO3NQuaxMunFXkQprtWneJfR4uugMOKk/v
+tMhQGCDB7o3CVhLGSb+76Tny+eSa2g+Zv17PGVfhnF9oynkCII+shX9TmOUsDEnS
+7MWES58YwnpBZrxdeJVPEzVVuYEZP4QsLrIL1ynFqBwFAnPU48Hs6s+kOI/9BFJz
+v+Fp/iw8BZSOclpJzA5rkW6yQ7LVfjFBV1CgyhO8GH5jhYBd5ZLvG8eLNm8Gpt+H
+n30awoaDoMuHcGS5B6NOZLfwE+suTxMw8pjHhKXx7RkSoeZy72PinlbWn1tWLiPa
+UMdkrb/WHOdMKaadQTDO/VyibBL49iJ8BAlERgIl9QaRDLjAIdD45rLdBe95HxSl
+zpZqsxuI09eJ8+iLFJhTDH2BODoEuqbn6PB/5z2d5zuG5sr85Vzn81ddapuUT9Ra
+/dB5eJQeFZ0WjtUOO3gS/A==
+-----END CERTIFICATE-----
+
diff --git a/swan-cern/templates/fluentd/fluentd_ca.yaml b/swan-cern/templates/fluentd/fluentd_ca.yaml
new file mode 100644
index 00000000..9d1efb5e
--- /dev/null
+++ b/swan-cern/templates/fluentd/fluentd_ca.yaml
@@ -0,0 +1,7 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: fluentd-ca
+ namespace: {{ .Release.Namespace }}
+data:
+{{ (.Files.Glob "files/ca-certs/ca-bundle.crt").AsConfig | indent 2 }}
\ No newline at end of file
diff --git a/swan-cern/templates/fluentd/fluentd_filters.conf.yaml b/swan-cern/templates/fluentd/fluentd_filters.conf.yaml
new file mode 100644
index 00000000..980687fc
--- /dev/null
+++ b/swan-cern/templates/fluentd/fluentd_filters.conf.yaml
@@ -0,0 +1,131 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: fluentd-filters-conf-{{ include "fluentd.shortReleaseName" . }}
+ namespace: {{ .Release.namespace }}
+data:
+ filters.conf: |-
+ # Initial tagging of logs:
+ # - user: user session pods
+ # - hub: hub pod
+ # - logs: other pods
+
+ @type rewrite_tag_filter
+ capitalize_regex_backreference true
+
+ key $.kubernetes.labels.component
+ pattern /^singleuser-server$/
+ tag user
+
+
+ key $.kubernetes.labels.component
+ pattern /^hub$/
+ tag hub
+
+
+ key $.kubernetes.pod_name
+ pattern /.?/
+ tag logs
+
+
+
+ # Retag user and hub logs to:
+ # - metrics: jupyter and jupyterhub custom log messages, used to emit metrics
+ # - logs: rest of log messages
+
+ @type rewrite_tag_filter
+ capitalize_regex_backreference true
+
+ key $.log
+ pattern /^.*?user: .*?, host: .*?, metric: .*?, value: .*$/
+ tag metrics
+
+
+ key $.log
+ pattern /.?/
+ tag logs
+
+
+
+ # Extract relevant information from metric logs:
+ # - user
+ # - host
+ # - metric key: name of the metric
+ # - metric context (optional): additional qualifiers for the metric
+ # - metric value
+
+ @type parser
+ key_name log
+
+ reserve_data true
+ emit_invalid_record_to_error true
+
+
+ @type grok
+
+ # keep unmatched logs
+ grok_failure_key grokfailure
+
+
+ # messages printed with "metric: metric_key.metric_context", where metric_context can have dots
+ pattern user: %{DATA:_metric_user_}, host: %{DATA}, metric: (?<_metric_key_>(\w*))(\.)(?<_metric_context_>(.*)), value: %{GREEDYDATA:_metric_value_}
+
+
+ # messages printed with "metric: metric_key"
+ pattern user: %{DATA:_metric_user_}, host: %{DATA}, metric: (?<_metric_key_>(.*))(?<_metric_context_>(.*)), value: %{GREEDYDATA:_metric_value_}
+
+
+
+
+ # Store extracted metric information in a new "metrics" field, as part of the JSON
+ # of the log message. Use metric key as first attribute under "metrics".
+ # Format: metrics.metric_key.user|context|value
+
+ @type record_transformer
+ enable_ruby
+
+ metrics.${record['_metric_key_']}.user ${record['_metric_user_']}
+ metrics.${record['_metric_key_']}.context ${record['_metric_context_']}
+ metrics.${record['_metric_key_']}.value ${record['_metric_value_']}
+
+ # the below field is needed to run queries that group values by the last segment of the metric context (segments split by '.')
+ # which looks like "metrics.{record['_metric_key_']}_lastelementofmetric_context = {record['metric_value']}"
+ # for example, for a record with below fields
+ # "data.metrics.spawn.context = LCG_xxx.some_spark_cluster.exception_class"
+ # "data.metrics.spawn.value = None"
+ # it emits a field "data.metrics.spawn_exception_class = None
+ metrics.${record['_metric_key_']}_${record['_metric_context_'].gsub('-','_').split('.')[-1]} ${record['_metric_value_']}
+
+ remove_keys ["_metric_user_", "_metric_key_", "_metric_context_", "_metric_value_"]
+
+
+ # Emit general log records with type "logs"
+ # Accessible in OpenSearch with pattern monit_private_swan_logs_logs
+
+ @type record_transformer
+
+ type "logs"
+
+
+
+ # Emit metric log records with type "metrics"
+ # Accessible in OpenSearch with pattern monit_private_swan_logs_metrics
+
+ @type record_transformer
+
+ type "metrics"
+
+
+
+ # Set MONIT producer for all logs
+ # Keep raw log data in "raw" field
+
+ @type record_transformer
+ enable_ruby
+
+ timestamp ${(time.to_f * 1000).to_i}
+ producer {{ .Values.fluentd.output.producer }}
+ raw ${record["log"]} # field named raw is indexed by MONIT/OpenSearch
+
+ remove_keys ["log"]
+
diff --git a/swan-cern/templates/fluentd/fluentd_outputs.conf.yaml b/swan-cern/templates/fluentd/fluentd_outputs.conf.yaml
new file mode 100644
index 00000000..47e4d345
--- /dev/null
+++ b/swan-cern/templates/fluentd/fluentd_outputs.conf.yaml
@@ -0,0 +1,24 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: fluentd-outputs-conf-{{ include "fluentd.shortReleaseName" . }}
+ namespace: {{ .Release.namespace }}
+data:
+ outputs.conf: |-
+ # Push logs to MONIT endpoint
+
+ @type http
+ endpoint_url {{ .Values.fluentd.output.endpoint }}
+ serializer json
+ http_method post
+ cacert_file {{ .Values.fluentd.output.cacert }}
+ authentication basic
+ username {{ .Values.fluentd.output.username }}
+ password {{ .Values.fluentd.output.password }}
+
+ # Forward errors in this pipeline to fluentd pod stdout for debugging
+
\ No newline at end of file
diff --git a/swan-cern/templates/fluentd/fluentd_sources.conf.yaml b/swan-cern/templates/fluentd/fluentd_sources.conf.yaml
new file mode 100644
index 00000000..d508a8a1
--- /dev/null
+++ b/swan-cern/templates/fluentd/fluentd_sources.conf.yaml
@@ -0,0 +1,40 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: fluentd-sources-conf-{{ include "fluentd.shortReleaseName" . }}
+ namespace: {{ .Release.namespace }}
+data:
+ sources.conf: |-
+ # Do not collect fluentd logs to avoid infinite loops.
+
+ @type null
+
+
+ {{ if not .Values.fluentd.output.includeInternal }}
+
+ @type null
+
+ {{ end }}
+
+ # Read from container logs
+
+
+ # Add some basic kubernetes metadata (e.g. pod name) to the logs
+
+ @type kubernetes_metadata
+ @id filter_kube_metadata
+
diff --git a/swan-cern/values.yaml b/swan-cern/values.yaml
index 2c084b6d..9aee1a09 100644
--- a/swan-cern/values.yaml
+++ b/swan-cern/values.yaml
@@ -258,154 +258,39 @@ hadoopTokenGenerator:
image: gitlab-registry.cern.ch/swan/docker-images/hadoop-token-generator:v2.1.2
fluentd:
- containerRuntime: containerd
+ caCertPath: &fluentdCaCertPath /etc/ssl/ca-bundle.crt
plugins:
- fluent-plugin-rewrite-tag-filter
- fluent-plugin-out-http
- fluent-plugin-grok-parser
+ - fluent-plugin-route
+ containerRuntime: containerd
output:
- endpoint: http://monit-logs.cern.ch:10012/
includeInternal: false
- parsingConfig: |
- # Initial tagging of logs:
- # - user: user session pods
- # - hub: hub pod
- # - logs: other pods
-
- @type rewrite_tag_filter
- capitalize_regex_backreference true
-
- key $.kubernetes.labels.component
- pattern /^singleuser-server$/
- tag user
-
-
- key $.kubernetes.labels.component
- pattern /^hub$/
- tag hub
-
-
- key $.kubernetes.pod_name
- pattern /.?/
- tag logs
-
-
-
- # Retag user and hub logs to:
- # - metrics: jupyter and jupyterhub custom log messages, used to emit metrics
- # - logs: rest of log messages
-
- @type rewrite_tag_filter
- capitalize_regex_backreference true
-
- key $.log
- pattern /^.*?user: .*?, host: .*?, metric: .*?, value: .*$/
- tag metrics
-
-
- key $.log
- pattern /.?/
- tag logs
-
-
-
- # Extract relevant information from metric logs:
- # - user
- # - host
- # - metric key: name of the metric
- # - metric context (optional): additional qualifiers for the metric
- # - metric value
-
- @type parser
- key_name log
-
- reserve_data true
- emit_invalid_record_to_error true
-
-
- @type grok
-
- # keep unmatched logs
- grok_failure_key grokfailure
-
-
- # messages printed with "metric: metric_key.metric_context", where metric_context can have dots
- pattern user: %{DATA:_metric_user_}, host: %{DATA}, metric: (?<_metric_key_>(\w*))(\.)(?<_metric_context_>(.*)), value: %{GREEDYDATA:_metric_value_}
-
-
- # messages printed with "metric: metric_key"
- pattern user: %{DATA:_metric_user_}, host: %{DATA}, metric: (?<_metric_key_>(.*))(?<_metric_context_>(.*)), value: %{GREEDYDATA:_metric_value_}
-
-
-
-
- # Store extracted metric information in a new "metrics" field, as part of the JSON
- # of the log message. Use metric key as first attribute under "metrics".
- # Format: metrics.metric_key.user|context|value
-
- @type record_transformer
- enable_ruby
-
- metrics.${record['_metric_key_']}.user ${record['_metric_user_']}
- metrics.${record['_metric_key_']}.context ${record['_metric_context_']}
- metrics.${record['_metric_key_']}.value ${record['_metric_value_']}
-
- # the below field is needed to run queries that group values by the last segment of the metric context (segments split by '.')
- # which looks like "metrics.{record['_metric_key_']}_lastelementofmetric_context = {record['metric_value']}"
- # for example, for a record with below fields
- # "data.metrics.spawn.context = LCG_xxx.some_spark_cluster.exception_class"
- # "data.metrics.spawn.value = None"
- # it emits a field "data.metrics.spawn_exception_class = None
- metrics.${record['_metric_key_']}_${record['_metric_context_'].gsub('-','_').split('.')[-1]} ${record['_metric_value_']}
-
- remove_keys ["_metric_user_", "_metric_key_", "_metric_context_", "_metric_value_"]
-
-
- # Emit general log records with type "logs"
- # Accessible in OpenSearch with pattern monit_private_swan_logs_logs
-
- @type record_transformer
-
- type "logs"
-
-
-
- # Emit metric log records with type "metrics"
- # Accessible in OpenSearch with pattern monit_private_swan_logs_metrics
-
- @type record_transformer
-
- type "metrics"
-
-
-
- # Set MONIT producer for all logs
- # Keep raw log data in "raw" field
-
- @type record_transformer
- enable_ruby
-
- timestamp ${(time.to_f * 1000).to_i}
- producer "#{ENV['OUTPUT_PRODUCER']}"
- raw ${record["log"]} # field named raw is indexed by MONIT/OpenSearch
-
- remove_keys ["log"]
-
-
- # Push logs to MONIT endpoint
-
- @type http
- endpoint_url "#{ENV['OUTPUT_ENDPOINT']}"
- serializer json
- http_method post
-
-
- # Forward errors in this pipeline to fluentd pod stdout for debugging
-
+ cacert: *fluentdCaCertPath
+ configMapConfigs:
+ - fluentd-prometheus-conf # Preserve prometheus config for probes to work
+ - fluentd-sources-conf
+ - fluentd-filters-conf
+ - fluentd-outputs-conf
+ fileConfigs:
+ # This is to disable the configuration that comes from upstream
+ 01_sources.conf: ""
+ 02_filters.conf: ""
+ 03_dispatch.conf: ""
+ 04_outputs.conf: ""
+ volumeMounts:
+ - name: ca-certificate
+ mountPath: *fluentdCaCertPath
+ subPath: ca-bundle.crt
+ readOnly: true
+ volumes:
+ - name: ca-certificate
+ configMap:
+ name: fluentd-ca
+ items:
+ - key: ca-bundle.crt
+ path: ca-bundle.crt
gpu-operator:
enabled: true