Skip to content

Commit

Permalink
Use fluentd official helm chart
Browse files Browse the repository at this point in the history
Separated the fluentd pipeline in three different config maps:
sources, filters and outputs. The SWAN logs are collected from
the source, filtered with custom filters and pushed to an endpoint
defined in the outputs config map.

Furthermore, included a CA bundle file, which is mounted in the
fluentd pods and  includes the CERN CA certificates, in order to be
able to push the filtered logs to a custom endpoint through TLS and
to be able to verify the TLS certificate of the server.
  • Loading branch information
PMax5 authored and etejedor committed Oct 3, 2023
1 parent 65c8380 commit abe7bdd
Show file tree
Hide file tree
Showing 7 changed files with 327 additions and 144 deletions.
4 changes: 2 additions & 2 deletions swan-cern/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ dependencies:
version: 0.6.6
repository: oci://registry.cern.ch/swan/charts
- name: fluentd
repository: http://registry.cern.ch/chartrepo/cern
version: 0.1.5
version: 0.4.3
repository: https://fluent.github.io/helm-charts
- name: gpu-operator
version: 22.9.1
repository: https://helm.ngc.nvidia.com/nvidia
96 changes: 96 additions & 0 deletions swan-cern/files/ca-certs/ca-bundle.crt
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# CERN Root Certification Authority 2
-----BEGIN CERTIFICATE-----
MIIGqTCCBJGgAwIBAgIQAojDcLlcbrhBX0qrEka4mzANBgkqhkiG9w0BAQ0FADBK
MQswCQYDVQQGEwJjaDENMAsGA1UEChMEQ0VSTjEsMCoGA1UEAxMjQ0VSTiBSb290
IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IDIwHhcNMTMwMzE5MTI1NTM2WhcNMzMw
MzE5MTMwNTM0WjBKMQswCQYDVQQGEwJjaDENMAsGA1UEChMEQ0VSTjEsMCoGA1UE
AxMjQ0VSTiBSb290IENlcnRpZmljYXRpb24gQXV0aG9yaXR5IDIwggIiMA0GCSqG
SIb3DQEBAQUAA4ICDwAwggIKAoICAQDxqYPFW2qVVi3Rw1NKlEf7x70xF+6a8uE/
Tu4ZVQF/K2RXI95QLkYfKItZvy9Az3ib/VlUho5f8fBaqy4n70uwC7+qd3Aq1/xQ
ysykPCbBBAsOSQQpTlhrMD2V5Ya9zrirphOhutddiqV96zBCyMM+Gz5uYv9u+cm4
tg1EOmAMGh2UNxfTFNVmXKkk7eFTSC1+zgb28H6nd3xzV27sn9bfOfGh//ZPy5gm
Qx0Oh/tc6WMreWzRZBQm5SJiK0QOzPv09p5WmdY2WxZoqNTFBDACQO7ysFOktc74
fPVFX/lmt4jFNSZRIOvvaACI/qlEaAJTR4FHIY9uSMsV8DrtzhI1Ucyv3kqlQpbF
jDouq44IryA/np4s/124bW+x8+n/v+at/AxPjvHBLiGhB+J38Z6KcJogoDnGzIXR
S+YUr/vGz34jOmkRuDN5STuuAXzyCKFXaoAm0AwjTziIv3E0jxC1taw6FpKevnd1
CLsTLAEUiEjzStFkDhd/Hpipc57zmMFY8VYet2wVqSFjnt2REWOVbZlbCiMHmSeD
u5EuZLiU8xlkiaCfn4A5XZ6X0qprbgDviGJtwxzNvTg7Hn0ziW5/ELryfQXCwZJ+
FVne8Zu8sbgy/sDkX+pyFuyB4XgiM0eMNkoexIXJaRdlMWDIL5ysiIXQKjhynAv5
KLHbRjciVwIDAQABo4IBiTCCAYUwCwYDVR0PBAQDAgGGMA8GA1UdEwEB/wQFMAMB
Af8wHQYDVR0OBBYEFPp7+96bDaPyUrds7VsPC6KmpvgEMBAGCSsGAQQBgjcVAQQD
AgEAMIIBMgYDVR0gBIIBKTCCASUwggEhBgorBgEEAWAKBAEBMIIBETCBwgYIKwYB
BQUHAgIwgbUegbIAQwBFAFIATgAgAFIAbwBvAHQAIABDAGUAcgB0AGkAZgBpAGMA
YQB0AGkAbwBuACAAQQB1AHQAaABvAHIAaQB0AHkAIAAyACAAQwBlAHIAdABpAGYA
aQBjAGEAdABlACAAUABvAGwAaQBjAHkAIABhAG4AZAAgAEMAZQByAHQAaQBmAGkA
YwBhAHQAZQAgAFAAcgBhAGMAdABpAGMAZQAgAFMAdABhAHQAZQBtAGUAbgB0MEoG
CCsGAQUFBwIBFj5odHRwOi8vY2FmaWxlcy5jZXJuLmNoL2NhZmlsZXMvY3AtY3Bz
L2Nlcm4tcm9vdC1jYTItY3AtY3BzLnBkZjANBgkqhkiG9w0BAQ0FAAOCAgEAo0Px
l4CZ6C6bDH+b6jV5uUO0NIHtvLuVgQLMdKVHtQ2UaxeIrWwD+Kz1FyJCHTRXrCvE
OFOca9SEYK2XrbqZGvRKdDRsq+XYts6aCampXj5ahh6r4oQJ8U7aLVfziKTK13Gy
dYFoAUeUrlNklICt3v2wWBaa1tg2oSlU2g4iCg9kYpRnIW3VKSrVsdVk2lUa4EXs
nTEJ30OS7rqX3SdqZp8G+awtBEReh2XPhRgJ6w3xiScP/UdWYUam2LflCGX3RibB
/DZhgGHRRoE4/D0kQMP2XTz6cClbNklECTlp0qZIbiaf350HbcDEFzYRSSIi0emv
kRGcMgsi8yTTU87q8Cr4hETxAF3ZbSVNC0ZaTZ8RBbM9BXguhYzKkVBgG/cMpUjs
B6tY2HMZbAZ3TKQRb/bRyUigM9DniKWeXkeL/0Nsno+XbcpAqLjtVIRwCg6jTLUi
1NRsl3BP6C824dVaoI8Ry7m+o6O+mtocw4BMhHfTcoWCO8CWjT0ME67JzaAYa5eM
+OqoWtgbgweBlfO0/3GMnVGMAmI4FlhH2oWKWQgWdgr0Wgh9K05VcxSpJ87/zjhb
MQn/bEojWmp6eUppPaqNFcELvud41qoe6hLsOYQVUQ1sHi7n6ouhg4BAbwS2iyD2
uiA6FHTCeLreFGUzs5osPKiz3GE5D6V9she9xIQ=
-----END CERTIFICATE-----

# CERN Grid Certification Authority
-----BEGIN CERTIFICATE-----
MIIJnDCCB4SgAwIBAgIKYQQltAAAAAAACzANBgkqhkiG9w0BAQ0FADBKMQswCQYD
VQQGEwJjaDENMAsGA1UEChMEQ0VSTjEsMCoGA1UEAxMjQ0VSTiBSb290IENlcnRp
ZmljYXRpb24gQXV0aG9yaXR5IDIwHhcNMjIwMzI5MDgyNDIyWhcNMzIwMzI5MDgz
NDIyWjBWMRIwEAYKCZImiZPyLGQBGRYCY2gxFDASBgoJkiaJk/IsZAEZFgRjZXJu
MSowKAYDVQQDEyFDRVJOIEdyaWQgQ2VydGlmaWNhdGlvbiBBdXRob3JpdHkwggIi
MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDS9Ypy1csm0aZA4/QnWe2oaiQI
LqfeekV8kSSvOhW2peo5cLNIKbXATOo1l2iwIbCWV8SRU2TLKxHIL8fAOJud5n9K
mEKBew7nzubl1wG93B4dY0KREdb3/QB/7OkG8ZZvLqrvQZVGT1CgJ+NFFUiJ315D
FWkKctZv27LjQamzCxpX+gZSsmwZmSReY67cnm6P7z+/3xVNhwb+4Z+1Ww4vHhMc
dh1Dsrkv9vXU01UN752QtQ6l56uQLYEB2+vaHB6IpyC9zAQ/33GulCq8Gbj7ykPd
9AcRVBeJAErSK+oMHThtdLD7mhTkZivakaNe4O1EhPFH0rWwV45IFN7ipELA5qDx
djdzo6JtLJQMaSV/TV+amEf2CaKlD0giqGhjfSNiOX5HCmpqV14kbl+7Qho6ykZy
b1DGpf70yILnX+AUtdpd8lulTu1yg1Bg5cFQskUIk5+s4nsC1VpmeNxYaeFEcYZj
Ph2mdD7zLo889MtF7kZv7+6J6p4NBL3fQ9Os8/h8XVlfDatzbpVH4jYKKAd4nwJb
knJaKPE0LzLzVfJBwnDxqe8hb64gI8Frludp+jaOYzvMqlzAe9z4a9971iXIWaaG
unbAoEkXj69y7MsvCjWXB7o9HdBaS9FL+ZtXTKCyXl+XLFseYQoQburKr+eTcRed
KLJNj4tRF1799PO69wIDAQABo4IEdjCCBHIwEAYJKwYBBAGCNxUBBAMCAQEwIwYJ
KwYBBAGCNxUCBBYEFGPCgXhtlBTXUVYziSFk8YWmsNHgMB0GA1UdDgQWBBSloP1m
WP253Xrhsp2fo9HlUBiU5zCCAS4GA1UdIASCASUwggEhMIIBHQYKKwYBBAFgCgQB
ATCCAQ0wgb4GCCsGAQUFBwICMIGxHoGuAEMARQBSAE4AIABHAHIAaQBkACAAQwBl
AHIAdABpAGYAaQBjAGEAdABpAG8AbgAgAEEAdQB0AGgAbwByAGkAdAB5ACAAQwBl
AHIAdABpAGYAaQBjAGEAdABlACAAUABvAGwAaQBjAHkAIABhAG4AZAAgAEMAZQBy
AHQAaQBmAGkAYwBhAHQAZQAgAFAAcgBhAGMAdABpAGMAZQAgAFMAdABhAHQAZQBt
AGUAbgB0MEoGCCsGAQUFBwIBFj5odHRwOi8vY2FmaWxlcy5jZXJuLmNoL2NhZmls
ZXMvY3AtY3BzL2Nlcm4tZ3JpZC1jYS1jcC1jcHMucGRmADAZBgkrBgEEAYI3FAIE
DB4KAFMAdQBiAEMAQTALBgNVHQ8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAfBgNV
HSMEGDAWgBT6e/vemw2j8lK3bO1bDwuipqb4BDCCAUQGA1UdHwSCATswggE3MIIB
M6CCAS+gggErhlJodHRwOi8vY2FmaWxlcy5jZXJuLmNoL2NhZmlsZXMvY3JsL0NF
Uk4lMjBSb290JTIwQ2VydGlmaWNhdGlvbiUyMEF1dGhvcml0eSUyMDIuY3JshoHU
bGRhcDovLy9DTj1DRVJOJTIwUm9vdCUyMENlcnRpZmljYXRpb24lMjBBdXRob3Jp
dHklMjAyLENOPUNFUk5QS0lST09UMDIsQ049Q0RQLENOPVB1YmxpYyUyMEtleSUy
MFNlcnZpY2VzLENOPVNlcnZpY2VzLENOPUNvbmZpZ3VyYXRpb24sREM9Y2VybixE
Qz1jaD9jZXJ0aWZpY2F0ZVJldm9jYXRpb25MaXN0P2Jhc2U/b2JqZWN0Q2xhc3M9
Y1JMRGlzdHJpYnV0aW9uUG9pbnQwggFEBggrBgEFBQcBAQSCATYwggEyMGcGCCsG
AQUFBzAChltodHRwOi8vY2FmaWxlcy5jZXJuLmNoL2NhZmlsZXMvY2VydGlmaWNh
dGVzL0NFUk4lMjBSb290JTIwQ2VydGlmaWNhdGlvbiUyMEF1dGhvcml0eSUyMDIu
Y3J0MIHGBggrBgEFBQcwAoaBuWxkYXA6Ly8vQ049Q0VSTiUyMFJvb3QlMjBDZXJ0
aWZpY2F0aW9uJTIwQXV0aG9yaXR5JTIwMixDTj1BSUEsQ049UHVibGljJTIwS2V5
JTIwU2VydmljZXMsQ049U2VydmljZXMsQ049Q29uZmlndXJhdGlvbixEQz1jZXJu
LERDPWNoP2NBQ2VydGlmaWNhdGU/YmFzZT9vYmplY3RDbGFzcz1jZXJ0aWZpY2F0
aW9uQXV0aG9yaXR5MA0GCSqGSIb3DQEBDQUAA4ICAQAv56iMPo0VUkrHxPYLjfyW
IL/TmYxxYldO8kCTKXyaRO4ZmwD6JjLaclTgSHz7gOKFL35ZF0Rv4nWk/ZJBl+dU
1udgBjF/uKK0v0m+7iEIOG0HORCCQCDgayuiLomI5eQp8KTgHrswHWL+ESxa3Hdv
vr7GBG/7EhrYwstm/tOJ8cKaeiooSxHw5Lgsqq229SxfO8fSyS8DAa5eUdWT/dVU
RDR8lGQShx4R9JOHSDg0y6rE7V0cw/BO3NQuaxMunFXkQprtWneJfR4uugMOKk/v
tMhQGCDB7o3CVhLGSb+76Tny+eSa2g+Zv17PGVfhnF9oynkCII+shX9TmOUsDEnS
7MWES58YwnpBZrxdeJVPEzVVuYEZP4QsLrIL1ynFqBwFAnPU48Hs6s+kOI/9BFJz
v+Fp/iw8BZSOclpJzA5rkW6yQ7LVfjFBV1CgyhO8GH5jhYBd5ZLvG8eLNm8Gpt+H
n30awoaDoMuHcGS5B6NOZLfwE+suTxMw8pjHhKXx7RkSoeZy72PinlbWn1tWLiPa
UMdkrb/WHOdMKaadQTDO/VyibBL49iJ8BAlERgIl9QaRDLjAIdD45rLdBe95HxSl
zpZqsxuI09eJ8+iLFJhTDH2BODoEuqbn6PB/5z2d5zuG5sr85Vzn81ddapuUT9Ra
/dB5eJQeFZ0WjtUOO3gS/A==
-----END CERTIFICATE-----

7 changes: 7 additions & 0 deletions swan-cern/templates/fluentd/fluentd_ca.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: fluentd-ca
namespace: {{ .Release.Namespace }}
data:
{{ (.Files.Glob "files/ca-certs/ca-bundle.crt").AsConfig | indent 2 }}
131 changes: 131 additions & 0 deletions swan-cern/templates/fluentd/fluentd_filters.conf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: fluentd-filters-conf-{{ include "fluentd.shortReleaseName" . }}
namespace: {{ .Release.namespace }}
data:
filters.conf: |-
# Initial tagging of logs:
# - user: user session pods
# - hub: hub pod
# - logs: other pods
<match kubernetes.**>
@type rewrite_tag_filter
capitalize_regex_backreference true
<rule>
key $.kubernetes.labels.component
pattern /^singleuser-server$/
tag user
</rule>
<rule>
key $.kubernetes.labels.component
pattern /^hub$/
tag hub
</rule>
<rule>
key $.kubernetes.pod_name
pattern /.?/
tag logs
</rule>
</match>
# Retag user and hub logs to:
# - metrics: jupyter and jupyterhub custom log messages, used to emit metrics
# - logs: rest of log messages
<match {user,hub}>
@type rewrite_tag_filter
capitalize_regex_backreference true
<rule>
key $.log
pattern /^.*?user: .*?, host: .*?, metric: .*?, value: .*$/
tag metrics
</rule>
<rule>
key $.log
pattern /.?/
tag logs
</rule>
</match>
# Extract relevant information from metric logs:
# - user
# - host
# - metric key: name of the metric
# - metric context (optional): additional qualifiers for the metric
# - metric value
<filter {metrics}>
@type parser
key_name log
reserve_data true
emit_invalid_record_to_error true
<parse>
@type grok
# keep unmatched logs
grok_failure_key grokfailure
<grok>
# messages printed with "metric: metric_key.metric_context", where metric_context can have dots
pattern user: %{DATA:_metric_user_}, host: %{DATA}, metric: (?<_metric_key_>(\w*))(\.)(?<_metric_context_>(.*)), value: %{GREEDYDATA:_metric_value_}
</grok>
<grok>
# messages printed with "metric: metric_key"
pattern user: %{DATA:_metric_user_}, host: %{DATA}, metric: (?<_metric_key_>(.*))(?<_metric_context_>(.*)), value: %{GREEDYDATA:_metric_value_}
</grok>
</parse>
</filter>
# Store extracted metric information in a new "metrics" field, as part of the JSON
# of the log message. Use metric key as first attribute under "metrics".
# Format: metrics.metric_key.user|context|value
<filter {metrics}>
@type record_transformer
enable_ruby
<record>
metrics.${record['_metric_key_']}.user ${record['_metric_user_']}
metrics.${record['_metric_key_']}.context ${record['_metric_context_']}
metrics.${record['_metric_key_']}.value ${record['_metric_value_']}
# the below field is needed to run queries that group values by the last segment of the metric context (segments split by '.')
# which looks like "metrics.{record['_metric_key_']}_lastelementofmetric_context = {record['metric_value']}"
# for example, for a record with below fields
# "data.metrics.spawn.context = LCG_xxx.some_spark_cluster.exception_class"
# "data.metrics.spawn.value = None"
# it emits a field "data.metrics.spawn_exception_class = None
metrics.${record['_metric_key_']}_${record['_metric_context_'].gsub('-','_').split('.')[-1]} ${record['_metric_value_']}
</record>
remove_keys ["_metric_user_", "_metric_key_", "_metric_context_", "_metric_value_"]
</filter>
# Emit general log records with type "logs"
# Accessible in OpenSearch with pattern monit_private_swan_logs_logs
<filter {logs}>
@type record_transformer
<record>
type "logs"
</record>
</filter>
# Emit metric log records with type "metrics"
# Accessible in OpenSearch with pattern monit_private_swan_logs_metrics
<filter {metrics}>
@type record_transformer
<record>
type "metrics"
</record>
</filter>
# Set MONIT producer for all logs
# Keep raw log data in "raw" field
<filter {logs,metrics}>
@type record_transformer
enable_ruby
<record>
timestamp ${(time.to_f * 1000).to_i}
producer {{ .Values.fluentd.output.producer }}
raw ${record["log"]} # field named raw is indexed by MONIT/OpenSearch
</record>
remove_keys ["log"]
</filter>
24 changes: 24 additions & 0 deletions swan-cern/templates/fluentd/fluentd_outputs.conf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: fluentd-outputs-conf-{{ include "fluentd.shortReleaseName" . }}
namespace: {{ .Release.namespace }}
data:
outputs.conf: |-
# Push logs to MONIT endpoint
<match {logs,metrics}>
@type http
endpoint_url {{ .Values.fluentd.output.endpoint }}
serializer json
http_method post
cacert_file {{ .Values.fluentd.output.cacert }}
authentication basic
username {{ .Values.fluentd.output.username }}
password {{ .Values.fluentd.output.password }}
</match>
# Forward errors in this pipeline to fluentd pod stdout for debugging
<label @ERROR>
<match **>
@type stdout
</match>
</label>
40 changes: 40 additions & 0 deletions swan-cern/templates/fluentd/fluentd_sources.conf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: fluentd-sources-conf-{{ include "fluentd.shortReleaseName" . }}
namespace: {{ .Release.namespace }}
data:
sources.conf: |-
# Do not collect fluentd logs to avoid infinite loops.
<match kubernetes.var.log.containers.**fluentd**>
@type null
</match>
{{ if not .Values.fluentd.output.includeInternal }}
<match kubernetes.var.log.containers.**_kube-system_**>
@type null
</match>
{{ end }}
# Read from container logs
<source>
@type tail
@id in_tail_container_logs
path /var/log/containers/*.log
pos_file /var/log/fluentd-containers.log.pos
tag kubernetes.*
read_from_head true
<parse>
@type regexp
expression /^(?<time>.+) (?<stream>stdout|stderr) [^ ]* (?<log>.*)$/
time_format '%Y-%m-%dT%H:%M:%S.%NZ'
keep_time_key false
</parse>
emit_unmatched_lines true
</source>
# Add some basic kubernetes metadata (e.g. pod name) to the logs
<filter kubernetes.**>
@type kubernetes_metadata
@id filter_kube_metadata
</filter>
Loading

0 comments on commit abe7bdd

Please sign in to comment.