Skip to content

Commit

Permalink
Merge pull request #985 from k8s-infra-cherrypick-robot/cherry-pick-9…
Browse files Browse the repository at this point in the history
…73-to-release-1.6

[release-1.6] Create standalone lock release controller
  • Loading branch information
k8s-ci-robot authored Nov 2, 2024
2 parents a25bec2 + b1bc9eb commit f44b9ca
Show file tree
Hide file tree
Showing 11 changed files with 523 additions and 181 deletions.
31 changes: 31 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
# Core Filestore CSI driver binary
DRIVERBINARY=gcp-filestore-csi-driver
WEBHOOKBINARY=gcp-filestore-csi-driver-webhook
LOCKRELEASEBINARY=gcp-filestore-csi-driver-lockrelease
$(info PULL_BASE_REF is $(PULL_BASE_REF))
$(info PWD is $(PWD))

Expand Down Expand Up @@ -45,6 +46,15 @@ else
endif
$(info WEBHOOK_STAGINGIMAGE is $(WEBHOOK_STAGINGIMAGE))

LOCKRELEASE_STAGINGIMAGE=
ifdef GCP_FS_CSI_LOCKRELEASE_STAGING_IMAGE
LOCKRELEASE_STAGINGIMAGE=$(GCP_FS_CSI_LOCKRELEASE_STAGING_IMAGE)
else
LOCKRELEASE_STAGINGIMAGE=gcr.io/$(PROJECT)/gcp-filestore-csi-driver-lockrelease
endif
$(info LOCKRELEASE_STAGINGIMAGE is $(LOCKRELEASE_STAGINGIMAGE))


BINDIR?=bin

# This flag is used only for csi-client and windows.
Expand Down Expand Up @@ -142,6 +152,27 @@ build-and-push-multi-arch: build-image-and-push-linux-arm64 build-image-and-push
docker manifest create --amend $(STAGINGIMAGE):$(STAGINGVERSION) $(STAGINGIMAGE):$(STAGINGVERSION)_linux_amd64 $(STAGINGIMAGE):$(STAGINGVERSION)_linux_arm64
docker manifest push -p $(STAGINGIMAGE):$(STAGINGVERSION)

# Build the go binary for the CSI driver lock release controller.
lockrelease:
mkdir -p ${BINDIR}
{ \
set -e ; \
CGO_ENABLED=0 go build -mod=vendor -a -ldflags '-X main.version=$(STAGINGVERSION) -extldflags "-static"' -o ${BINDIR}/${LOCKRELEASEBINARY} ./cmd/lockrelease/; \
}

# Build the docker image for the lock release controller.
lockrelease-image: init-buildx
{ \
set -e ; \
docker buildx build \
--platform linux/amd64 \
--build-arg STAGINGVERSION=$(STAGINGVERSION) \
--build-arg BUILDPLATFORM=linux/amd64 \
--build-arg TARGETPLATFORM=linux/amd64 \
-f ./cmd/lockrelease/Dockerfile \
-t $(LOCKRELEASE_STAGINGIMAGE):$(STAGINGVERSION) --push .; \
}

# Build the go binary for the CSI driver.
# STAGINGVERSION may contain multiple tags (e.g. canary, vX.Y.Z etc). Use one of the tags
# for setting the driver version variable. For convenience we are using the first value.
Expand Down
26 changes: 26 additions & 0 deletions cmd/lockrelease/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Copyright 2024 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM --platform=$BUILDPLATFORM golang:1.22.6 as builder

ARG TARGETPLATFORM

WORKDIR /go/src/sigs.k8s.io/gcp-filestore-csi-driver
ADD . .
RUN GOARCH=$(echo $TARGETPLATFORM | cut -f2 -d '/') make lockrelease BINDIR=/bin GCP_FS_CSI_STAGING_VERSION=${STAGINGVERSION}

FROM gcr.io/distroless/static
ARG LOCKRELEASEBINARY=gcp-filestore-csi-driver-lockrelease
COPY --from=builder /bin/${LOCKRELEASEBINARY} /${LOCKRELEASEBINARY}
ENTRYPOINT ["/gcp-filestore-csi-driver-lockrelease"]
122 changes: 122 additions & 0 deletions cmd/lockrelease/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"flag"
"time"

"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"

"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
releaselock "sigs.k8s.io/gcp-filestore-csi-driver/pkg/releaselock"
"sigs.k8s.io/gcp-filestore-csi-driver/pkg/util"
)

var (
lockReleaseSyncPeriod = flag.Duration("lock-release-sync-period", 3600*time.Second, "Duration, in seconds, the sync period of the lock release controller. Defaults to 3600 seconds.")

httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the HTTP server for diagnostics, including metrics and leader election health check, will listen (example: `:8080`). The default is empty string.")
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")

leaderElectionLeaseDuration = flag.Duration("leader-election-lease-duration", 15*time.Second, "Duration, in seconds, that non-leader candidates will wait to force acquire leadership. Defaults to 15 seconds.")
leaderElectionRenewDeadline = flag.Duration("leader-election-renew-deadline", 10*time.Second, "Duration, in seconds, that the acting leader will retry refreshing leadership before giving up. Defaults to 10 seconds.")
leaderElectionRetryPeriod = flag.Duration("leader-election-retry-period", 5*time.Second, "Duration, in seconds, the LeaderElector clients should wait between tries of actions. Defaults to 5 seconds.")

workQueueRateLimiterBaseDelay = flag.Duration("rate-limiter-base-delay", 5*time.Millisecond, "Base dalay of the work queue rate limiter. Default is 5ms.")
workQueueRateLimiterMaxDelay = flag.Duration("rate-limiter-max-delay", 1000*time.Second, "Max dalay of the work queue rate limiter. Default is 1000s.")
)

func main() {
klog.InitFlags(nil)
flag.Parse()
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
config, err := rest.InClusterConfig()
if err != nil {
klog.Fatalf("Failed to create an in cluster config: %v", err)
}
client, err := kubernetes.NewForConfig(config)
if err != nil {
klog.Fatalf("Failed to create a new discovery client: %v", err)
}
lockReleaseConfig := &releaselock.LockReleaseControllerConfig{
LeaseDuration: *leaderElectionLeaseDuration,
RenewDeadline: *leaderElectionRenewDeadline,
RetryPeriod: *leaderElectionRetryPeriod,
SyncPeriod: *lockReleaseSyncPeriod,
WorkQueueRateLimiterBaseDelay: *workQueueRateLimiterBaseDelay,
WorkQueueRateLimiterMaxDelay: *workQueueRateLimiterMaxDelay,
MetricEndpoint: *httpEndpoint,
MetricPath: *metricsPath,
}
factory := informers.NewSharedInformerFactory(client, lockReleaseConfig.SyncPeriod)
nodeInformer := factory.Core().V1().Nodes().Informer()

c, err := releaselock.NewLockReleaseController(client, lockReleaseConfig, &nodeInformer)
if err != nil {
klog.Fatalf("Failed to create a lock release controller: %v", err)
}

nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
klog.Infof("Node informer received node create event. %v", obj)
c.EnqueueCreateEventObject(obj)
},
UpdateFunc: func(oldObj, newObj interface{}) {
klog.Infof("Node informer received node update event. old %v, new %v", oldObj, newObj)
c.EnqueueUpdateEventObject(oldObj, newObj)
},
})

run := func(ctx context.Context) {
klog.Infof("Lock release controller %s started leading on node %s", c.GetId(), c.GetHost())
factory.Start(ctx.Done())
c.Run(ctx)
}

rl, err := resourcelock.New(
resourcelock.LeasesResourceLock,
util.ManagedFilestoreCSINamespace,
releaselock.LeaseName,
nil,
c.GetClient().CoordinationV1(),
resourcelock.ResourceLockConfig{
Identity: c.GetId(),
})
if err != nil {
klog.Fatalf("Error creating resourcelock: %v", err)
}

// Use leader election, so that during rolling upgrade, only one of this controller and the old version lock release controller
// is running.
leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{
Lock: rl,
LeaseDuration: lockReleaseConfig.LeaseDuration,
RenewDeadline: lockReleaseConfig.RenewDeadline,
RetryPeriod: lockReleaseConfig.RetryPeriod,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: run,
OnStoppedLeading: func() {
klog.Fatalf("%s no longer the leader", c.GetId())
},
},
})
}
4 changes: 1 addition & 3 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,7 @@ var (
resourceTagsStr = flag.String("resource-tags", "", "Resource tags to attach to each volume created. It is a comma separated list of tags of the form '<parentID_1>/<tagKey_1>/<tagValue_1>...<parentID_N>/<tagKey_N>/<tagValue_N>' where, parentID is the ID of Organization or Project resource where tag key and value resources exist, tagKey is the shortName of the tag key resource, tagValue is the shortName of the tag value resource. See https://cloud.google.com/resource-manager/docs/tags/tags-creating-and-managing for more details.")

// Feature lock release specific parameters, only take effect when feature-lock-release is set to true.
featureLockRelease = flag.Bool("feature-lock-release", false, "if set to true, the node driver will support Filestore lock release.")
lockReleaseSyncPeriod = flag.Duration("lock-release-sync-period", 60*time.Second, "Duration, in seconds, the sync period of the lock release controller. Defaults to 60 seconds.")
featureLockRelease = flag.Bool("feature-lock-release", false, "if set to true, the node driver will support Filestore lock release.")

// Feature configurable shares per Filestore instance specific parameters.
featureMaxSharePerInstance = flag.Bool("feature-max-shares-per-instance", false, "If this feature flag is enabled, allows the user to configure max shares packed per Filestore instance")
Expand Down Expand Up @@ -169,7 +168,6 @@ func main() {
LeaseDuration: *leaderElectionLeaseDuration,
RenewDeadline: *leaderElectionRenewDeadline,
RetryPeriod: *leaderElectionRetryPeriod,
SyncPeriod: *lockReleaseSyncPeriod,
MetricEndpoint: *httpEndpoint,
MetricPath: *metricsPath,
},
Expand Down
44 changes: 27 additions & 17 deletions deploy/kubernetes/overlays/lockrelease/configmap_rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ metadata:
name: gke-managed-filestorecsi

---
##### Lock release controller Service Account, Roles, RoleBindings
apiVersion: v1
kind: ServiceAccount
metadata:
name: filestore-lockrelease-controller-sa
namespace: gcp-filestore-csi-driver
---

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
Expand All @@ -16,21 +23,6 @@ rules:

---

kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: filestorecsi-node-driver-cluster-role-binding
subjects:
- kind: ServiceAccount
name: gcp-filestore-csi-node-sa
namespace: gcp-filestore-csi-driver
roleRef:
kind: ClusterRole
name: filestorecsi-node-driver-cluster-role
apiGroup: rbac.authorization.k8s.io

---

kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
Expand All @@ -39,7 +31,7 @@ metadata:
rules:
- apiGroups: [""]
resources: ["configmaps"]
verbs: ["get", "list", "update", "create"]
verbs: ["get", "update", "create"]

---

Expand All @@ -55,14 +47,29 @@ rules:

---

kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: filestorecsi-lockrelease-controller-cluster-role-binding
subjects:
- kind: ServiceAccount
name: filestore-lockrelease-controller-sa
namespace: gcp-filestore-csi-driver
roleRef:
kind: ClusterRole
name: filestorecsi-node-driver-cluster-role
apiGroup: rbac.authorization.k8s.io

---

kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: filestorecsi-node-leaderelection-binding
namespace: gke-managed-filestorecsi
subjects:
- kind: ServiceAccount
name: gcp-filestore-csi-node-sa
name: filestore-lockrelease-controller-sa
namespace: gcp-filestore-csi-driver
roleRef:
kind: Role
Expand All @@ -80,6 +87,9 @@ subjects:
- kind: ServiceAccount
name: gcp-filestore-csi-node-sa
namespace: gcp-filestore-csi-driver
- kind: ServiceAccount
name: filestore-lockrelease-controller-sa
namespace: gcp-filestore-csi-driver
roleRef:
kind: Role
name: filestorecsi-node-driver-role
Expand Down
2 changes: 1 addition & 1 deletion deploy/kubernetes/overlays/lockrelease/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ kind: Kustomization
resources:
- ../stable-master
- configmap_rbac.yaml

- lock_release_controller.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: filestore-lock-release-controller
namespace: gcp-filestore-csi-driver
labels:
k8s-app: filestore-lock-release-controller
spec:
replicas: 1
selector:
matchLabels:
k8s-app: filestore-lock-release-controller
template:
metadata:
labels:
k8s-app: filestore-lock-release-controller
component: filestorecsi
spec:
priorityClassName: csi-gcp-fs-node
nodeSelector:
kubernetes.io/os: linux
containers:
- name: filestore-lock-release-controller
image: registry.k8s.io/sig-storage/filestore-lockrelease-controller
args:
- --v=6
resources:
requests:
cpu: 5m
memory: 10Mi
serviceAccountName: filestore-lockrelease-controller-sa
tolerations:
- key: "kubernetes.io/arch"
operator: "Equal"
value: "arm64"
effect: "NoSchedule"
4 changes: 0 additions & 4 deletions pkg/csi_driver/gcfs_driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,10 +326,6 @@ func (driver *GCFSDriver) Run(endpoint string) {
// Start the nonblocking GRPC.
s := NewNonBlockingGRPCServer()
s.Start(endpoint, driver.ids, driver.cs, driver.ns)
if driver.config.RunNode && driver.config.FeatureOptions.FeatureLockRelease.Enabled {
// Start the lock release controller on node driver.
driver.ns.(*nodeServer).lockReleaseController.Run(context.Background())
}
s.Wait()
}

Expand Down
3 changes: 2 additions & 1 deletion pkg/csi_driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ var (
)

// nodeServer handles mounting and unmounting of GCFS volumes on a node
// TODO(b/375481562): refactor config map utils & remove node driver's dependency on lockReleaseController
type nodeServer struct {
driver *GCFSDriver
mounter mount.Interface
Expand All @@ -76,7 +77,7 @@ func newNodeServer(driver *GCFSDriver, mounter mount.Interface, metaService meta
if err != nil {
return nil, err
}
lc, err := lockrelease.NewLockReleaseController(client, ns.features.FeatureLockRelease.Config)
lc, err := lockrelease.NewLockReleaseController(client, ns.features.FeatureLockRelease.Config, nil)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit f44b9ca

Please sign in to comment.