Skip to content

Commit

Permalink
Zuul/Nodepool/ZK - extend internal certficiate validity duration
Browse files Browse the repository at this point in the history
We are issuing those internal certificates using cert-manager for
the ZK <-> Zuul/Nodepool SSL communication.

The default validity for them was 3 months with a renewal after 2 months.
The issue is that we need to handle the restart of Nodepool/Zuul and
ZK when the renewal is done by cert-manager.

This patch proposes to avoid that burden by extending the validity
to a very long duration.

If for whatever reason the local CA and belonging
certificates need to be rotated, checkout the sf-operator documentation
to read about the process.

Change-Id: I1bc1885589e11f8300a2d46fe9e53737c5407398
  • Loading branch information
morucci committed Sep 11, 2024
1 parent b986db0 commit 3c78918
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 18 deletions.
13 changes: 8 additions & 5 deletions controllers/libs/cert/certificates.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,23 @@ const (
LocalCACertSecretName = "ca-cert"
)

var EonDuration, _ = time.ParseDuration("219000h") // 25 years

func MkBaseCertificate(name string, ns string, issuerName string,
dnsNames []string, secretName string, isCA bool, duration time.Duration,
usages []certv1.KeyUsage, commonName *string,
privateKey *certv1.CertificatePrivateKey) certv1.Certificate {
renewBefore, _ := time.ParseDuration("168h") // 7 days
cert := certv1.Certificate{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: ns,
},
Spec: certv1.CertificateSpec{
DNSNames: dnsNames,
Duration: &metav1.Duration{Duration: duration},
SecretName: secretName,
DNSNames: dnsNames,
Duration: &metav1.Duration{Duration: EonDuration},
RenewBefore: &metav1.Duration{Duration: renewBefore},
SecretName: secretName,
IssuerRef: cmmeta.ObjectReference{
Kind: "Issuer",
Name: issuerName,
Expand All @@ -48,8 +52,7 @@ func MkBaseCertificate(name string, ns string, issuerName string,
}

func MkCertificate(name string, ns string, issuerName string,
dnsNames []string, secretName string, privateKey *certv1.CertificatePrivateKey) certv1.Certificate {
duration, _ := time.ParseDuration("2160h") // 3 months (default)
dnsNames []string, secretName string, privateKey *certv1.CertificatePrivateKey, duration time.Duration) certv1.Certificate {
usages := []certv1.KeyUsage{
certv1.UsageServerAuth,
certv1.UsageClientAuth,
Expand Down
4 changes: 2 additions & 2 deletions controllers/nodepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ func (r *SFController) DeployNodepoolBuilder(statsdExporterVolume apiv1.Volume,
"statsd_mapping": utils.Checksum([]byte(nodepoolStatsdMappingConfig)),
"image": base.NodepoolBuilderImage(),
"nodepool-providers-secrets": getSecretsVersion(providersSecrets, providerSecretsExists),
"serial": "15",
"serial": "16",
"corporate-ca-certs-version": getCMVersion(corporateCM, corporateCMExists),
}

Expand Down Expand Up @@ -713,7 +713,7 @@ func (r *SFController) DeployNodepoolLauncher(statsdExporterVolume apiv1.Volume,
"nodepool.yaml": utils.Checksum([]byte(generateConfigScript)),
"nodepool-logging.yaml": utils.Checksum([]byte(loggingConfig)),
"statsd_mapping": utils.Checksum([]byte(nodepoolStatsdMappingConfig)),
"serial": "10",
"serial": "11",
// When the Secret ResourceVersion field change (when edited) we force a nodepool-launcher restart
"image": base.NodepoolLauncherImage(),
"nodepool-providers-secrets": getSecretsVersion(providersSecrets, providerSecretsExists),
Expand Down
18 changes: 18 additions & 0 deletions controllers/softwarefactory_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,24 @@ func isOperatorReady(services map[string]bool) bool {

// cleanup ensures removal of legacy resources
func (r *SFController) cleanup() {

caCert := certv1.Certificate{}
if r.GetM(cert.LocalCACertSecretName, &caCert) {
// Here we are detecting the previous version duration to ensure we have to run the cleanup
prevDuration, _ := time.ParseDuration("87600h") // 10y
if caCert.Spec.Duration.Duration.String() == prevDuration.String() {
for _, name := range []string{"zookeeper-server", "zookeeper-client", "ca-cert"} {
// remove invalid certificate resource
r.DeleteR(&certv1.Certificate{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: r.ns,
},
})
}
}
}

// remove a legacy Route definition for gateway
r.DeleteR(&apiroutev1.Route{
ObjectMeta: metav1.ObjectMeta{
Expand Down
6 changes: 2 additions & 4 deletions controllers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ import (
"sort"
"strconv"
"strings"
"time"

"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand Down Expand Up @@ -122,7 +121,7 @@ func (r *SFUtilContext) UpdateR(obj client.Object) bool {
return true
}

// PatchR delete a resource.
// PatchR patches a resource.
func (r *SFUtilContext) PatchR(obj client.Object, patch client.Patch) {
if err := r.Client.Patch(r.ctx, obj, patch); err != nil {
panic(err.Error())
Expand Down Expand Up @@ -283,10 +282,9 @@ func (r *SFUtilContext) EnsureLocalCA() {
// https://cert-manager.io/docs/configuration/selfsigned/#bootstrapping-ca-issuers
selfSignedIssuer := cert.MkSelfSignedIssuer("selfsigned-issuer", r.ns)
CAIssuer := cert.MkCAIssuer("ca-issuer", r.ns)
duration, _ := time.ParseDuration("87600h") // 10y
commonName := "cacert"
rootCACertificate := cert.MkBaseCertificate(cert.LocalCACertSecretName, r.ns, "selfsigned-issuer", []string{"caroot"},
cert.LocalCACertSecretName, true, duration, nil, &commonName, nil)
cert.LocalCACertSecretName, true, cert.EonDuration, nil, &commonName, nil)
r.GetOrCreate(&selfSignedIssuer)
r.GetOrCreate(&CAIssuer)
r.GetOrCreate(&rootCACertificate)
Expand Down
1 change: 1 addition & 0 deletions controllers/weeder.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ func (r *SFController) EnsureZuulWeeder(checksum string) bool {

annotations := map[string]string{
"zuul-conf": checksum,
"serial": "1",
}

dep := base.MkDeployment(ident, r.ns, "quay.io/software-factory/zuul-weeder:latest", r.cr.Spec.ExtraLabels)
Expand Down
6 changes: 3 additions & 3 deletions controllers/zookeeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ func (r *SFController) DeployZookeeper() bool {
Encoding: certv1.PKCS8,
}
certificate := cert.MkCertificate(
"zookeeper-server", r.ns, "ca-issuer", dnsNames, "zookeeper-server-tls", &privateKey)
"zookeeper-server", r.ns, "ca-issuer", dnsNames, "zookeeper-server-tls", &privateKey, cert.EonDuration)
certClient := cert.MkCertificate(
"zookeeper-client", r.ns, "ca-issuer", dnsNames, "zookeeper-client-tls", &privateKey)
"zookeeper-client", r.ns, "ca-issuer", dnsNames, "zookeeper-client-tls", &privateKey, cert.EonDuration)
r.GetOrCreate(&certificate)
r.GetOrCreate(&certClient)

Expand All @@ -101,7 +101,7 @@ func (r *SFController) DeployZookeeper() bool {
annotations := map[string]string{
"configuration": utils.Checksum([]byte(configChecksumable)),
"image": base.ZookeeperImage(),
"serial": "4",
"serial": "5",
}

volumeMountsStatsExporter := []apiv1.VolumeMount{
Expand Down
8 changes: 4 additions & 4 deletions controllers/zuul.go
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ func (r *SFController) EnsureZuulScheduler(cfg *ini.File) bool {
"statsd-image": base.StatsdExporterImage(),
"node-exporter-image": base.NodeExporterImage(),
"statsd_mapping": utils.Checksum([]byte(zuulStatsdMappingConfig)),
"serial": "9",
"serial": "10",
"zuul-logging": utils.Checksum([]byte(r.getZuulLoggingString("zuul-scheduler"))),
"zuul-extra": utils.Checksum([]byte(sshConfig)),
"zuul-connections": utils.IniSectionsChecksum(cfg, utils.IniGetSectionNamesByPrefix(cfg, "connection")),
Expand Down Expand Up @@ -563,7 +563,7 @@ func (r *SFController) EnsureZuulExecutor(cfg *ini.File) bool {
"zuul-common-config": utils.IniSectionsChecksum(cfg, commonIniConfigSections),
"zuul-component-config": utils.IniSectionsChecksum(cfg, sections),
"zuul-image": getZuulImage("zuul-executor"),
"serial": "7",
"serial": "8",
"zuul-logging": utils.Checksum([]byte(r.getZuulLoggingString("zuul-executor"))),
"zuul-connections": utils.IniSectionsChecksum(cfg, utils.IniGetSectionNamesByPrefix(cfg, "connection")),
"corporate-ca-certs-version": getCMVersion(corporateCM, corporateCMExists),
Expand Down Expand Up @@ -640,7 +640,7 @@ func (r *SFController) EnsureZuulMerger(cfg *ini.File) bool {
"zuul-common-config": utils.IniSectionsChecksum(cfg, commonIniConfigSections),
"zuul-component-config": utils.IniSectionsChecksum(cfg, sections),
"zuul-image": getZuulImage(service),
"serial": "5",
"serial": "6",
"zuul-connections": utils.IniSectionsChecksum(cfg, utils.IniGetSectionNamesByPrefix(cfg, "connection")),
"zuul-logging": utils.Checksum([]byte(r.getZuulLoggingString("zuul-merger"))),
"corporate-ca-certs-version": getCMVersion(corporateCM, corporateCMExists),
Expand Down Expand Up @@ -705,7 +705,7 @@ func (r *SFController) EnsureZuulWeb(cfg *ini.File) bool {
"zuul-common-config": utils.IniSectionsChecksum(cfg, commonIniConfigSections),
"zuul-component-config": utils.IniSectionsChecksum(cfg, sections),
"zuul-image": getZuulImage("zuul-web"),
"serial": "6",
"serial": "7",
"zuul-logging": utils.Checksum([]byte(r.getZuulLoggingString("zuul-web"))),
"zuul-connections": utils.IniSectionsChecksum(cfg, utils.IniGetSectionNamesByPrefix(cfg, "connection")),
"corporate-ca-certs-version": getCMVersion(corporateCM, corporateCMExists),
Expand Down
26 changes: 26 additions & 0 deletions doc/deployment/backing_services.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,29 @@ MariaDB is deployed as a single-pod statefulset.
ZooKeeper coordinates data and configurations between all the Zuul and Nodepool microservices.

ZooKeeper is deployed as a single-pod statefulset.

### Certificates

Zuul and Nodepool services authenticate to Zookeeper using a X509 client certificate. `sf-operator` manages a local Certificate Authority based on the `cert-manager` operator facilities to issue server and client certificates. Those certificates are set with a long validity period (25 years) and an operator might want to rotate those certificates for security reason. To do so:

Delete `Certificate` resources named:

- zookeeper-server
- zookeeper-client
- ca-cert

Rollout the following `Statefulset` and `Deployment` resources:

- zookeeper
- zuul-scheduler
- zuul-merger
- zuul-executor
- zuul-web
- weeder
- nodepool-builder
- nodepool-launcher

Then make sure to trigger the `Reconcile` loop of the `sf-operator` either by:

- if using the `standalone` deployment mode, running the `standalone` command
- if using the `OLM` deployment mode, the `Reconcile` will happen automatically
3 changes: 3 additions & 0 deletions doc/reference/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ All notable changes to this project will be documented in this file.
### Added
### Removed
### Changed

- zookeeper - increase certificate validity duration to 25 years to avoid renewal burden

### Fixed
### Security

Expand Down

0 comments on commit 3c78918

Please sign in to comment.