From c3c5765c8e77c0da38fa538f05feea133b25f5f7 Mon Sep 17 00:00:00 2001 From: Hiroto Funakoshi Date: Wed, 11 Sep 2024 11:02:56 +0900 Subject: [PATCH 1/5] Bugfix NGT flush logic (#2598) * fix: bugfix flush logic Signed-off-by: hlts2 * fix: nil check for flushing Signed-off-by: hlts2 * fix: add flush check logic Signed-off-by: hlts2 * fix: nil check bug Signed-off-by: hlts2 * fix: add nil check Signed-off-by: hlts2 * fix: return err when the flush process is executing Signed-off-by: hlts2 * fix: add error check for flushing Signed-off-by: hlts2 * fix: error message Signed-off-by: hlts2 * fix: disable kvs and vqueue initialization Signed-off-by: hlts2 * fix: disable commentout Signed-off-by: hlts2 * fix: disable kvs and vq Signed-off-by: hlts2 * fix: nil set to kvs and vq Signed-off-by: hlts2 * fix: copy ngt service object for flushing Signed-off-by: hlts2 * fix: deleted unnecessary nil check Signed-off-by: hlts2 * fix: variable name Signed-off-by: hlts2 --------- Signed-off-by: hlts2 Co-authored-by: Yusuke Kato --- pkg/agent/core/ngt/handler/grpc/index.go | 6 ++ pkg/agent/core/ngt/service/ngt.go | 73 +++++++++++++++--------- 2 files changed, 51 insertions(+), 28 deletions(-) diff --git a/pkg/agent/core/ngt/handler/grpc/index.go b/pkg/agent/core/ngt/handler/grpc/index.go index 62df71e180..8e7f24d9f6 100644 --- a/pkg/agent/core/ngt/handler/grpc/index.go +++ b/pkg/agent/core/ngt/handler/grpc/index.go @@ -64,6 +64,9 @@ func (s *server) CreateIndex( }, }, info.Get())...) code = codes.FailedPrecondition + case errors.Is(err, errors.ErrFlushingIsInProgress): + err = status.WrapWithAborted("CreateIndex API aborted to process create indexes request due to flushing indices is in progress", err, details...) + code = codes.Aborted case errors.Is(err, context.Canceled): err = status.WrapWithCanceled(fmt.Sprintf("CreateIndex API canceled to create indexes pool_size = %d, error: %v", c.GetPoolSize(), err), err, details...) code = codes.Canceled @@ -149,6 +152,9 @@ func (s *server) CreateAndSaveIndex( }, }, info.Get())...) code = codes.FailedPrecondition + case errors.Is(err, errors.ErrFlushingIsInProgress): + err = status.WrapWithAborted("CreateAndSaveIndex API aborted to process create indexes request due to flushing indices is in progress", err, details...) + code = codes.Aborted case errors.Is(err, context.Canceled): err = status.WrapWithCanceled(fmt.Sprintf("CreateAndSaveIndex API canceled to create indexes pool_size = %d, error: %v", c.GetPoolSize(), err), err, details...) code = codes.Canceled diff --git a/pkg/agent/core/ngt/service/ngt.go b/pkg/agent/core/ngt/service/ngt.go index 095f0b6f0a..09bf8577f5 100644 --- a/pkg/agent/core/ngt/service/ngt.go +++ b/pkg/agent/core/ngt/service/ngt.go @@ -260,6 +260,26 @@ func newNGT(cfg *config.NGT, opts ...Option) (n *ngt, err error) { return n, nil } +func (n *ngt) copyNGT(src *ngt) { + // instances + n.core = src.core + n.kvs = src.kvs + n.fmap = src.fmap + n.vq = src.vq + + // counters + n.wfci = src.wfci + n.nobic = src.nobic + n.nopvq = atomic.Uint64{} + + // paths + n.path = src.path + n.tmpPath = src.tmpPath + n.oldPath = src.oldPath + n.basePath = src.basePath + n.brokenPath = src.brokenPath +} + // migrate migrates the index directory from old to new under the input path if necessary. // Migration happens when the path is not empty and there is no `path/origin` directory, // which indicates that the user has NOT been using CoW mode and the index directory is not migrated yet. @@ -908,7 +928,7 @@ func (n *ngt) Start(ctx context.Context) <-chan error { } return ctx.Err() case <-tick.C: - if n.vq.IVQLen() >= n.alen { + if n.vq != nil && !n.IsFlushing() && n.vq.IVQLen() >= n.alen { err = n.CreateIndex(ctx, n.poolSize) } case <-limit.C: @@ -1242,14 +1262,12 @@ func (n *ngt) RegenerateIndexes(ctx context.Context) (err error) { if err != nil { log.Errorf("failed to flushing vector to ngt index in delete kvs. error: %v", err) } - n.kvs = nil - n.vq = nil // gc runtime.GC() atomic.AddUint64(&n.nogce, 1) - if n.inMem { + if !n.inMem { // delete file err = file.DeleteDir(ctx, n.path) if err != nil { @@ -1265,30 +1283,14 @@ func (n *ngt) RegenerateIndexes(ctx context.Context) (err error) { } } - nkvs := kvs.New(kvs.WithConcurrency(n.kvsdbConcurrency)) - - nvq, err := vqueue.New() - if err != nil { - log.Errorf("failed to create new vector vector queue. error: %v", err) - } - // renew instance nn, err := newNGT(n.cfg, n.opts...) if err != nil { return err } - nn.kvs = nkvs - nn.vq = nvq - - // Regenerate with flags set - nn.flushing.Store(true) - nn.indexing.Store(true) - defer nn.flushing.Store(false) - defer nn.indexing.Store(false) + n.copyNGT(nn) - n = nn - - return nil + return n.loadStatistics() } func (n *ngt) CreateIndex(ctx context.Context, poolSize uint32) (err error) { @@ -1299,8 +1301,11 @@ func (n *ngt) CreateIndex(ctx context.Context, poolSize uint32) (err error) { } }() - if n.isReadReplica { + switch { + case n.isReadReplica: return errors.ErrWriteOperationToReadReplica + case n.IsFlushing(): + return errors.ErrFlushingIsInProgress } ic := n.vq.IVQLen() + n.vq.DVQLen() @@ -1428,6 +1433,10 @@ func (n *ngt) CreateIndex(ctx context.Context, poolSize uint32) (err error) { return err } } + return n.loadStatistics() +} + +func (n *ngt) loadStatistics() error { if n.IsStatisticsEnabled() { log.Info("loading index statistics to cache") stats, err := n.core.GetGraphStatistics(core.AdditionalStatistics) @@ -1471,8 +1480,7 @@ func (n *ngt) CreateIndex(ctx context.Context, poolSize uint32) (err error) { IndegreeHistogram: stats.IndegreeHistogram, }) } - - return err + return nil } func (n *ngt) removeInvalidIndex(ctx context.Context) { @@ -1941,15 +1949,24 @@ func (n *ngt) gc() { } func (n *ngt) Len() uint64 { - return n.kvs.Len() + if n.kvs != nil && !n.IsFlushing() { + return n.kvs.Len() + } + return 0 } func (n *ngt) InsertVQueueBufferLen() uint64 { - return uint64(n.vq.IVQLen()) + if n.vq != nil && !n.IsFlushing() { + return uint64(n.vq.IVQLen()) + } + return 0 } func (n *ngt) DeleteVQueueBufferLen() uint64 { - return uint64(n.vq.DVQLen()) + if n.vq != nil && !n.IsFlushing() { + return uint64(n.vq.DVQLen()) + } + return 0 } func (n *ngt) GetDimensionSize() int { From 5e385d03a0835f04686127a12ed9ef0c805c27e1 Mon Sep 17 00:00:00 2001 From: Hiroto Funakoshi Date: Wed, 11 Sep 2024 16:23:33 +0900 Subject: [PATCH 2/5] fix: make command name (#2610) Signed-off-by: hlts2 --- Makefile.d/docker.mk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile.d/docker.mk b/Makefile.d/docker.mk index 858bd69e72..9a7ddd8a8a 100644 --- a/Makefile.d/docker.mk +++ b/Makefile.d/docker.mk @@ -38,7 +38,7 @@ docker/build: \ docker/build/index-save \ docker/build/loadtest \ docker/build/manager-index \ - docker/build/operator/helm \ + docker/build/helm-operator \ docker/build/readreplica-rotate .PHONY: docker/name/org @@ -255,13 +255,13 @@ docker/build/dev-container: IMAGE=$(DEV_CONTAINER_IMAGE) \ docker/build/image -.PHONY: docker/name/operator/helm -docker/name/operator/helm: +.PHONY: docker/name/helm-operator +docker/name/helm-operator: @echo "$(ORG)/$(HELM_OPERATOR_IMAGE)" -.PHONY: docker/build/operator/helm +.PHONY: docker/build/helm-operator ## build helm-operator image -docker/build/operator/helm: +docker/build/helm-operator: @make DOCKERFILE="$(ROOTDIR)/dockers/operator/helm/Dockerfile" \ IMAGE=$(HELM_OPERATOR_IMAGE) \ EXTRA_ARGS="--build-arg OPERATOR_SDK_VERSION=$(OPERATOR_SDK_VERSION) --build-arg UPX_OPTIONS=$(UPX_OPTIONS) $(EXTRA_ARGS)" \ From 52925b688ee3895105d1aa3c1d7d613ca78d8134 Mon Sep 17 00:00:00 2001 From: Kiichiro YUKAWA Date: Wed, 11 Sep 2024 16:38:06 +0900 Subject: [PATCH 3/5] Fix broken links in the document files (#2611) * :bug: :pencil: Fix broken links in document files Signed-off-by: vankichi * style: format code with Gofumpt and Prettier This commit fixes the style issues introduced in 07bb715 according to the output from Gofumpt and Prettier. Details: https://github.com/vdaas/vald/pull/2611 --------- Signed-off-by: vankichi Co-authored-by: deepsource-autofix[bot] <62050782+deepsource-autofix[bot]@users.noreply.github.com> --- Makefile.d/k8s.mk | 1 + docs/api/insert.md | 6 +- docs/api/object.md | 6 +- docs/api/remove.md | 8 +- docs/api/search.md | 24 +-- docs/api/update.md | 6 +- docs/api/upsert.md | 6 +- docs/contributing/coding-style.md | 2 +- docs/performance/continuous-benchmark.md | 36 ++-- docs/troubleshooting/client-side.md | 2 +- docs/tutorial/get-started-with-faiss-agent.md | 2 +- docs/tutorial/vald-multicluster-on-k8s.md | 10 +- docs/user-guides/cluster-role-binding.md | 2 +- k8s/gateway/gateway/mirror/clusterrole.yaml | 55 ++++++ .../gateway/mirror/clusterrolebinding.yaml | 34 ++++ k8s/gateway/gateway/mirror/configmap.yaml | 28 +++ k8s/gateway/gateway/mirror/deployment.yaml | 184 ++++++++++++++++++ k8s/gateway/gateway/mirror/hpa.yaml | 35 ++++ k8s/gateway/gateway/mirror/pdb.yaml | 31 +++ k8s/gateway/gateway/mirror/priorityclass.yaml | 29 +++ .../gateway/mirror/serviceaccount.yaml | 26 +++ k8s/gateway/gateway/mirror/svc.yaml | 45 +++++ 22 files changed, 523 insertions(+), 55 deletions(-) create mode 100644 k8s/gateway/gateway/mirror/clusterrole.yaml create mode 100644 k8s/gateway/gateway/mirror/clusterrolebinding.yaml create mode 100644 k8s/gateway/gateway/mirror/configmap.yaml create mode 100644 k8s/gateway/gateway/mirror/deployment.yaml create mode 100644 k8s/gateway/gateway/mirror/hpa.yaml create mode 100644 k8s/gateway/gateway/mirror/pdb.yaml create mode 100644 k8s/gateway/gateway/mirror/priorityclass.yaml create mode 100644 k8s/gateway/gateway/mirror/serviceaccount.yaml create mode 100644 k8s/gateway/gateway/mirror/svc.yaml diff --git a/Makefile.d/k8s.mk b/Makefile.d/k8s.mk index 628fac13b5..5da9210129 100644 --- a/Makefile.d/k8s.mk +++ b/Makefile.d/k8s.mk @@ -41,6 +41,7 @@ k8s/manifest/update: \ --set manager.index.saver.enabled=true \ --set manager.index.creator.enabled=true \ --set manager.index.corrector.enabled=true \ + --set gateway.mirror.enabled=true \ --output-dir $(TEMP_DIR) \ charts/vald mkdir -p k8s/gateway diff --git a/docs/api/insert.md b/docs/api/insert.md index ef9e2dd43f..eae2bd7dce 100644 --- a/docs/api/insert.md +++ b/docs/api/insert.md @@ -100,7 +100,7 @@ Inset RPC is the method to add a new single vector. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -226,7 +226,7 @@ It's the recommended method to insert a large number of vectors. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -349,7 +349,7 @@ Please be careful that the size of the request exceeds the limit. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting diff --git a/docs/api/object.md b/docs/api/object.md index ca4b0e89c2..a9e37ea9d0 100644 --- a/docs/api/object.md +++ b/docs/api/object.md @@ -67,7 +67,7 @@ Exists RPC is the method to check that a vector exists in the `vald-agent`. | 5 | NOT_FOUND | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -148,7 +148,7 @@ GetObject RPC is the method to get the metadata of a vector inserted into the `v | 5 | NOT_FOUND | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -242,7 +242,7 @@ Each Upsert request and response are independent. | 5 | NOT_FOUND | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting diff --git a/docs/api/remove.md b/docs/api/remove.md index 156176fc70..2846ea08ee 100644 --- a/docs/api/remove.md +++ b/docs/api/remove.md @@ -101,7 +101,7 @@ Remove RPC is the method to remove a single vector. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -222,7 +222,7 @@ RemoveByTimestamp RPC is the method to remove vectors based on timestamp. | 5 | NOT_FOUND | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -344,7 +344,7 @@ It's the recommended method to remove a large number of vectors. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -465,7 +465,7 @@ Please be careful that the size of the request exceeds the limit. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting diff --git a/docs/api/search.md b/docs/api/search.md index dab76e0cec..ccec2863d0 100644 --- a/docs/api/search.md +++ b/docs/api/search.md @@ -146,7 +146,7 @@ Search RPC is the method to search vector(s) similar to the request vector. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -267,7 +267,7 @@ The vector with the same requested ID should be indexed into the `vald-agent` be | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -403,7 +403,7 @@ Each Search request and response are independent. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -539,7 +539,7 @@ Each SearchByID request and response are independent. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -684,7 +684,7 @@ Please be careful that the size of the request exceeds the limit. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -830,7 +830,7 @@ Please be careful that the size of the request exceeds the limit. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -946,7 +946,7 @@ LinearSearch RPC is the method to linear search vector(s) similar to the request | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -1064,7 +1064,7 @@ You will get a `NOT_FOUND` error if the vector isn't stored. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -1196,7 +1196,7 @@ Each LinearSearch request and response are independent. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -1328,7 +1328,7 @@ Each LinearSearchByID request and response are independent. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -1469,7 +1469,7 @@ Please be careful that the size of the request exceeds the limit. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -1611,7 +1611,7 @@ Please be careful that the size of the request exceeds the limit. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting diff --git a/docs/api/update.md b/docs/api/update.md index f1517b2a64..2fb68b05f7 100644 --- a/docs/api/update.md +++ b/docs/api/update.md @@ -102,7 +102,7 @@ Update RPC is the method to update a single vector. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -231,7 +231,7 @@ It's the recommended method to update the large amount of vectors. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -357,7 +357,7 @@ Please be careful that the size of the request exceeds the limit. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting diff --git a/docs/api/upsert.md b/docs/api/upsert.md index bd3128419d..9ed9f6572c 100644 --- a/docs/api/upsert.md +++ b/docs/api/upsert.md @@ -105,7 +105,7 @@ Upsert RPC is the method to update the inserted vector to a new single vector or | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -232,7 +232,7 @@ It’s the recommended method to upsert a large number of vectors. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting @@ -356,7 +356,7 @@ Please be careful that the size of the request exceeds the limit. | 10 | ABORTED | | 13 | INTERNAL | -Please refer to [Response Status Code](./status.md) for more details. +Please refer to [Response Status Code](../status.md) for more details. ### Troubleshooting diff --git a/docs/contributing/coding-style.md b/docs/contributing/coding-style.md index ffd67cef7c..c5bb690079 100644 --- a/docs/contributing/coding-style.md +++ b/docs/contributing/coding-style.md @@ -1407,4 +1407,4 @@ Since each package has its purpose, we decided to apply different strategies to For the rest of the `./pkg` packages, we decided to implement the unit test for the exported function only. -Please follow the [unit test guideline](./unit-test-guideline.md) for more details on how to implement good unit test. +Please follow the [unit test guideline](../unit-test-guideline.md) for more details on how to implement good unit test. diff --git a/docs/performance/continuous-benchmark.md b/docs/performance/continuous-benchmark.md index ef683b1d5d..b63e12d316 100644 --- a/docs/performance/continuous-benchmark.md +++ b/docs/performance/continuous-benchmark.md @@ -45,24 +45,24 @@ And, Benchmark Operator also applies it to the Kubernetes cluster based on `Vald **main properties** -| Name | mandatory | Description | type | sample | -| :------------------------- | :-------- | :-------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------- | :------------------------------------------------------------------------------------------- | -| target | \* | target Vald cluster | object | ref: [target](#target-prop) | -| dataset | \* | dataset information | object | ref: [dataset](#dataset-prop) | -| job_type | \* | execute job type | string enum: [insert, update, upsert, remove, search, getobject, exists] | search | -| repetition | | the number of job repetitions
default: `1` | integer | 1 | -| replica | | the number of job concurrent job executions
default: `1` | integer | 2 | -| rps | | designed request per sec to the target cluster
default: `1000` | integer | 1000 | -| concurrency_limit | | goroutine count limit for rps adjustment
default: `200` | integer | 20 | -| ttl_seconds_after_finished | | time until deletion of Pod after job end
default: `600` | integer | 120 | -| insert_config | | request config for insert job | object | ref: [config](#insert-cfg-props) | -| update_config | | request config for update job | object | ref: [config](#update-cfg-props) | -| upsert_config | | request config for upsert job | object | ref: [config](#upsert-cfg-props) | -| search_config | | request config for search job | object | ref: [config](#search-cfg-props) | -| remove_config | | request config for remove job | object | ref: [config](#remove-cfg-props) | -| object_config | | request config for object job | object | ref: [config](#object-cfg-props) | -| client_config | | gRPC client config for running benchmark job
Tune if can not getting the expected performance with default config. | object | ref: [defaults.grpc](https://github.com/vdaas/vald/blob/main/charts/vald/README.md) | -| server_config | | server config for benchmark job pod
Tune if can not getting the expected performance with default config. | object | ref: [defaults.server_config](https://github.com/vdaas/vald/blob/main/charts/vald/README.md) | +| Name | mandatory | Description | type | sample | +| :------------------------- | :-------- | :-------------------------------------------------------------------------------------------------------------------- | :----------------------------------------------------------------------- | :--------------------------------------------------------------------------------------------- | +| target | \* | target Vald cluster | object | ref: [target](#target-prop) | +| dataset | \* | dataset information | object | ref: [dataset](#dataset-prop) | +| job_type | \* | execute job type | string enum: [insert, update, upsert, remove, search, getobject, exists] | search | +| repetition | | the number of job repetitions
default: `1` | integer | 1 | +| replica | | the number of job concurrent job executions
default: `1` | integer | 2 | +| rps | | designed request per sec to the target cluster
default: `1000` | integer | 1000 | +| concurrency_limit | | goroutine count limit for rps adjustment
default: `200` | integer | 20 | +| ttl_seconds_after_finished | | time until deletion of Pod after job end
default: `600` | integer | 120 | +| insert_config | | request config for insert job | object | ref: [config](#insert-cfg-props) | +| update_config | | request config for update job | object | ref: [config](#update-cfg-props) | +| upsert_config | | request config for upsert job | object | ref: [config](#upsert-cfg-props) | +| search_config | | request config for search job | object | ref: [config](#search-cfg-props) | +| remove_config | | request config for remove job | object | ref: [config](#remove-cfg-props) | +| object_config | | request config for object job | object | ref: [config](#object-cfg-props) | +| client_config | | gRPC client config for running benchmark job
Tune if can not getting the expected performance with default config. | object | ref: [defaults.grpc](https://github.com/vdaas/vald/blob/main/charts/vald/values.yaml) | +| server_config | | server config for benchmark job pod
Tune if can not getting the expected performance with default config. | object | ref: [defaults.server_config](https://github.com/vdaas/vald/blob/main/charts/vald/values.yaml) | diff --git a/docs/troubleshooting/client-side.md b/docs/troubleshooting/client-side.md index 6efc5f1ff3..a487c81383 100644 --- a/docs/troubleshooting/client-side.md +++ b/docs/troubleshooting/client-side.md @@ -55,4 +55,4 @@ Please check your CPU information. - [Provisioning Troubleshooting](../troubleshooting/provisioning.md) - [API Status](../api/status.md) -- [FAQ](../support/FAQ.md) +- [FAQ](/docs/support/faq) diff --git a/docs/tutorial/get-started-with-faiss-agent.md b/docs/tutorial/get-started-with-faiss-agent.md index 3b8dec1ad8..2abad816da 100644 --- a/docs/tutorial/get-started-with-faiss-agent.md +++ b/docs/tutorial/get-started-with-faiss-agent.md @@ -1,6 +1,6 @@ # Get Started -This tutorial is for those who have already completed [Get Started](https://github.com/vdaas/vald/blob/main/docs/tutorial/get-started.md). +This tutorial is for those who have already completed [Get Started](../tutorial/get-started.md). Please refer to Prepare the Kubernetes Cluster and others there. ## Deploy Vald on Kubernetes Cluster diff --git a/docs/tutorial/vald-multicluster-on-k8s.md b/docs/tutorial/vald-multicluster-on-k8s.md index 6cdd344bc6..15d1498a16 100644 --- a/docs/tutorial/vald-multicluster-on-k8s.md +++ b/docs/tutorial/vald-multicluster-on-k8s.md @@ -88,7 +88,7 @@ In this section, you will deploy three Vald clusters consisting of `vald-agent-n git clone https://github.com/vdaas/vald.git && cd vald ``` -2. Deploy on the `vald-01` Namespace using [dev-vald-01.yaml](https://github.com/vdaas/vald/blob/feature/mirror-gateway-definition/charts/vald/values/multi-vald/dev-vald-01.yaml) and [values.yaml](https://github.com/vdaas/vald/blob/main/example/helm/values.yaml) +2. Deploy on the `vald-01` Namespace using [dev-vald-01.yaml](https://github.com/vdaas/vald/blob/main/charts/vald/values/multi-vald/dev-vald-01.yaml) and [values.yaml](https://github.com/vdaas/vald/blob/main/example/helm/values.yaml) ```bash helm install vald-cluster-01 charts/vald \ @@ -97,7 +97,7 @@ In this section, you will deploy three Vald clusters consisting of `vald-agent-n -n vald-01 ``` -3. Deploy on the `vald-02` Namespace using [dev-vald-02.yaml](https://github.com/vdaas/vald/blob/feature/mirror-gateway-definition/charts/vald/values/multi-vald/dev-vald-02.yaml) and [values.yaml](https://github.com/vdaas/vald/blob/main/example/helm/values.yaml) +3. Deploy on the `vald-02` Namespace using [dev-vald-02.yaml](https://github.com/vdaas/vald/blob/main/charts/vald/values/multi-vald/dev-vald-02.yaml) and [values.yaml](https://github.com/vdaas/vald/blob/main/example/helm/values.yaml) ```bash helm install vald-cluster-02 charts/vald \ @@ -106,7 +106,7 @@ In this section, you will deploy three Vald clusters consisting of `vald-agent-n -n vald-02 ``` -4. Deploy on the `vald-03` Namespace using [dev-vald-03.yaml](https://github.com/vdaas/vald/blob/feature/mirror-gateway-definition/charts/vald/values/multi-vald/dev-vald-03.yaml) and [values.yaml](https://github.com/vdaas/vald/blob/main/example/helm/values.yaml) +4. Deploy on the `vald-03` Namespace using [dev-vald-03.yaml](https://github.com/vdaas/vald/blob/main/charts/vald/values/multi-vald/dev-vald-03.yaml) and [values.yaml](https://github.com/vdaas/vald/blob/main/example/helm/values.yaml) ```bash helm install vald-cluster-03 charts/vald \ @@ -194,7 +194,7 @@ It requires applying the `ValdMirrorTarget` Custom Resource to the one Namespace When applied successfully, the destination information is automatically created on other Namespaces when interconnected with each `vald-mirror-gateway`. -This tutorial will deploy the [ValdMirrorTarget](https://github.com/vdaas/vald/tree/main/charts/vald/values/mirror-target.yaml) Custom Resource to the `vald-03` Namespace with the following command. +This tutorial will deploy the [ValdMirrorTarget](https://github.com/vdaas/vald/blob/main/charts/vald/values/multi-vald/mirror-target.yaml) Custom Resource to the `vald-03` Namespace with the following command. ```bash kubectl apply -f ./charts/vald/values/multi-vald/mirror-target.yaml -n vald-03 @@ -255,7 +255,7 @@ If you are interested, please refer to [SDKs](https://vald.vdaas.org/docs/user- 3. Run Example - We use [example/client/mirror/main.go](https://github.com/vdaas/vald/blob/feature/mirror-gateway-example/example/client/mirror/main.go) to run the example. + We use [example/client/mirror/main.go](https://github.com/vdaas/vald/blob/main/example/client/mirror/main.go) to run the example. This example will insert and index 400 vectors into the Vald cluster from the Fashion-MNIST dataset via [gRPC](https://grpc.io/). And then, after waiting for indexing, it will request to search the nearest vector 10 times to all Vald clusters. You will get the 10 nearest neighbor vectors for each search query. diff --git a/docs/user-guides/cluster-role-binding.md b/docs/user-guides/cluster-role-binding.md index 36404ec157..af66e6f6b3 100644 --- a/docs/user-guides/cluster-role-binding.md +++ b/docs/user-guides/cluster-role-binding.md @@ -141,7 +141,7 @@ In this section, we will describe how to configure it and how to customize these ### Cluster role configuration for Vald Mirror Gateway -By looking at the [cluster role configuration](https://github.com/vdaas/vald/blob/main/k8s/gatewat/mirror/clusterrole.yaml), the access right of the following resources are granted to the cluster role `gateway-mirror`. +By looking at the [cluster role configuration](https://github.com/vdaas/vald/blob/main/k8s/gateway/mirror/clusterrole.yaml), the access right of the following resources are granted to the cluster role `gateway-mirror`. ```yaml apiVersion: rbac.authorization.k8s.io/v1 diff --git a/k8s/gateway/gateway/mirror/clusterrole.yaml b/k8s/gateway/gateway/mirror/clusterrole.yaml new file mode 100644 index 0000000000..c3e6bf4c1a --- /dev/null +++ b/k8s/gateway/gateway/mirror/clusterrole.yaml @@ -0,0 +1,55 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: gateway-mirror + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror +rules: + - apiGroups: + - vald.vdaas.org + resources: + - valdmirrortargets + verbs: + - create + - update + - delete + - get + - list + - watch + - patch + - apiGroups: + - vald.vdaas.org + resources: + - valdmirrortargets/status + verbs: + - create + - update + - get + - list + - patch + - apiGroups: + - vald.vdaas.org + resources: + - valdmirrortargets/finalizers + verbs: + - update diff --git a/k8s/gateway/gateway/mirror/clusterrolebinding.yaml b/k8s/gateway/gateway/mirror/clusterrolebinding.yaml new file mode 100644 index 0000000000..0795540838 --- /dev/null +++ b/k8s/gateway/gateway/mirror/clusterrolebinding.yaml @@ -0,0 +1,34 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: gateway-mirror + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: gateway-mirror +subjects: + - kind: ServiceAccount + name: gateway-mirror + namespace: default diff --git a/k8s/gateway/gateway/mirror/configmap.yaml b/k8s/gateway/gateway/mirror/configmap.yaml new file mode 100644 index 0000000000..a060d7020e --- /dev/null +++ b/k8s/gateway/gateway/mirror/configmap.yaml @@ -0,0 +1,28 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: ConfigMap +metadata: + name: vald-mirror-gateway-config + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror +data: + config.yaml: "---\nversion: v0.0.0\ntime_zone: UTC\nlogging:\n format: raw\n level: debug\n logger: glg\nserver_config:\n servers:\n - name: grpc\n host: 0.0.0.0\n port: 8081\n grpc:\n bidirectional_stream_concurrency: 20\n connection_timeout: \"\"\n enable_admin: true\n enable_reflection: true\n header_table_size: 0\n initial_conn_window_size: 2097152\n initial_window_size: 1048576\n interceptors:\n - RecoverInterceptor\n keepalive:\n max_conn_age: \"\"\n max_conn_age_grace: \"\"\n max_conn_idle: \"\"\n min_time: 10m\n permit_without_stream: false\n time: 3h\n timeout: 60s\n max_header_list_size: 0\n max_receive_message_size: 0\n max_send_message_size: 0\n read_buffer_size: 0\n write_buffer_size: 0\n mode: GRPC\n network: tcp\n probe_wait_time: 3s\n restart: true\n socket_option:\n ip_recover_destination_addr: false\n ip_transparent: false\n reuse_addr: true\n reuse_port: true\n tcp_cork: false\n tcp_defer_accept: false\n tcp_fast_open: false\n tcp_no_delay: false\n tcp_quick_ack: false\n socket_path: \"\"\n health_check_servers:\n - name: liveness\n host: 0.0.0.0\n port: 3000\n http:\n handler_timeout: \"\"\n http2:\n enabled: false\n handler_limit: 0\n max_concurrent_streams: 0\n max_decoder_header_table_size: 4096\n max_encoder_header_table_size: 4096\n max_read_frame_size: 0\n max_upload_buffer_per_connection: 0\n max_upload_buffer_per_stream: 0\n permit_prohibited_cipher_suites: true\n idle_timeout: \"\"\n read_header_timeout: \"\"\n read_timeout: \"\"\n shutdown_duration: 5s\n write_timeout: \"\"\n mode: REST\n network: tcp\n probe_wait_time: 3s\n restart: true\n socket_option:\n ip_recover_destination_addr: false\n ip_transparent: false\n reuse_addr: true\n reuse_port: true\n tcp_cork: false\n tcp_defer_accept: false\n tcp_fast_open: true\n tcp_no_delay: true\n tcp_quick_ack: true\n socket_path: \"\"\n - name: readiness\n host: 0.0.0.0\n port: 3001\n http:\n handler_timeout: \"\"\n http2:\n enabled: false\n handler_limit: 0\n max_concurrent_streams: 0\n max_decoder_header_table_size: 4096\n max_encoder_header_table_size: 4096\n max_read_frame_size: 0\n max_upload_buffer_per_connection: 0\n max_upload_buffer_per_stream: 0\n permit_prohibited_cipher_suites: true\n idle_timeout: \"\"\n read_header_timeout: \"\"\n read_timeout: \"\"\n shutdown_duration: 0s\n write_timeout: \"\"\n mode: REST\n network: tcp\n probe_wait_time: 3s\n restart: true\n socket_option:\n ip_recover_destination_addr: false\n ip_transparent: false\n reuse_addr: true\n reuse_port: true\n tcp_cork: false\n tcp_defer_accept: false\n tcp_fast_open: true\n tcp_no_delay: true\n tcp_quick_ack: true\n socket_path: \"\"\n metrics_servers:\n - name: pprof\n host: 0.0.0.0\n port: 6060\n http:\n handler_timeout: 5s\n http2:\n enabled: false\n handler_limit: 0\n max_concurrent_streams: 0\n max_decoder_header_table_size: 4096\n max_encoder_header_table_size: 4096\n max_read_frame_size: 0\n max_upload_buffer_per_connection: 0\n max_upload_buffer_per_stream: 0\n permit_prohibited_cipher_suites: true\n idle_timeout: 2s\n read_header_timeout: 1s\n read_timeout: 1s\n shutdown_duration: 5s\n write_timeout: 1m\n mode: REST\n network: tcp\n probe_wait_time: 3s\n restart: true\n socket_option:\n ip_recover_destination_addr: false\n ip_transparent: false\n reuse_addr: true\n reuse_port: true\n tcp_cork: true\n tcp_defer_accept: false\n tcp_fast_open: false\n tcp_no_delay: false\n tcp_quick_ack: false\n socket_path: \"\"\n startup_strategy:\n - liveness\n - pprof\n - grpc\n - readiness\n shutdown_strategy:\n - readiness\n - grpc\n - pprof\n - liveness\n full_shutdown_duration: 600s\n tls:\n ca: /path/to/ca\n cert: /path/to/cert\n enabled: false\n insecure_skip_verify: false\n key: /path/to/key\nobservability:\n enabled: false\n otlp:\n collector_endpoint: \"\"\n trace_batch_timeout: \"1s\"\n trace_export_timeout: \"1m\"\n trace_max_export_batch_size: 1024\n trace_max_queue_size: 256\n metrics_export_interval: \"1s\"\n metrics_export_timeout: \"1m\"\n attribute:\n namespace: \"_MY_POD_NAMESPACE_\"\n pod_name: \"_MY_POD_NAME_\"\n node_name: \"_MY_NODE_NAME_\"\n service_name: \"vald-mirror-gateway\"\n metrics:\n enable_cgo: true\n enable_goroutine: true\n enable_memory: true\n enable_version_info: true\n version_info_labels:\n - vald_version\n - server_name\n - git_commit\n - build_time\n - go_version\n - go_os\n - go_arch\n - algorithm_info\n trace:\n enabled: false\ngateway:\n pod_name: _MY_POD_NAME_\n register_duration: 1s\n namespace: _MY_POD_NAMESPACE_\n discovery_duration: 1s\n colocation: dc1\n group: \n net:\n dialer:\n dual_stack_enabled: false\n keepalive: 10m\n timeout: 30s\n dns:\n cache_enabled: true\n cache_expiration: 24h\n refresh_duration: 5m\n socket_option:\n ip_recover_destination_addr: false\n ip_transparent: false\n reuse_addr: true\n reuse_port: true\n tcp_cork: false\n tcp_defer_accept: true\n tcp_fast_open: true\n tcp_no_delay: true\n tcp_quick_ack: true\n tls:\n ca: /path/to/ca\n cert: /path/to/cert\n enabled: false\n insecure_skip_verify: false\n key: /path/to/key\n client:\n addrs:\n - vald-lb-gateway.default.svc.cluster.local:8081\n health_check_duration: \"1s\"\n connection_pool:\n enable_dns_resolver: true\n enable_rebalance: true\n old_conn_close_duration: 2m\n rebalance_duration: 30m\n size: 3\n backoff:\n backoff_factor: 1.1\n backoff_time_limit: 5s\n enable_error_log: true\n initial_duration: 5ms\n jitter_limit: 100ms\n maximum_duration: 5s\n retry_count: 100\n circuit_breaker:\n closed_error_rate: 0.7\n closed_refresh_timeout: 10s\n half_open_error_rate: 0.5\n min_samples: 1000\n open_timeout: 1s\n call_option:\n max_recv_msg_size: 0\n max_retry_rpc_buffer_size: 0\n max_send_msg_size: 0\n wait_for_ready: true\n dial_option:\n backoff_base_delay: 1s\n backoff_jitter: 0.2\n backoff_max_delay: 120s\n backoff_multiplier: 1.6\n enable_backoff: false\n initial_connection_window_size: 2097152\n initial_window_size: 1048576\n insecure: true\n interceptors: []\n keepalive:\n permit_without_stream: false\n time: \"\"\n timeout: 30s\n max_msg_size: 0\n min_connection_timeout: 20s\n net:\n dialer:\n dual_stack_enabled: true\n keepalive: \"\"\n timeout: \"\"\n dns:\n cache_enabled: true\n cache_expiration: 1h\n refresh_duration: 30m\n socket_option:\n ip_recover_destination_addr: false\n ip_transparent: false\n reuse_addr: true\n reuse_port: true\n tcp_cork: false\n tcp_defer_accept: false\n tcp_fast_open: false\n tcp_no_delay: false\n tcp_quick_ack: false\n tls:\n ca: /path/to/ca\n cert: /path/to/cert\n enabled: false\n insecure_skip_verify: false\n key: /path/to/key\n read_buffer_size: 0\n timeout: \"\"\n write_buffer_size: 0\n tls:\n ca: /path/to/ca\n cert: /path/to/cert\n enabled: false\n insecure_skip_verify: false\n key: /path/to/key\n self_mirror_addr: vald-mirror-gateway.default.svc.cluster.local:8081\n gateway_addr: vald-lb-gateway.default.svc.cluster.local:8081\n" diff --git a/k8s/gateway/gateway/mirror/deployment.yaml b/k8s/gateway/gateway/mirror/deployment.yaml new file mode 100644 index 0000000000..a1674d0acd --- /dev/null +++ b/k8s/gateway/gateway/mirror/deployment.yaml @@ -0,0 +1,184 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vald-mirror-gateway + labels: + app: vald-mirror-gateway + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror +spec: + progressDeadlineSeconds: 600 + revisionHistoryLimit: 2 + selector: + matchLabels: + app: vald-mirror-gateway + strategy: + rollingUpdate: + maxSurge: 25% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + creationTimestamp: null + labels: + app: vald-mirror-gateway + app.kubernetes.io/name: vald + app.kubernetes.io/instance: release-name + app.kubernetes.io/component: gateway-mirror + annotations: + checksum/configmap: 8546d68c1063b706eda3ba96b346cf7e76c59d8010692426f21b802aa1f35a2e + pyroscope.io/scrape: "true" + pyroscope.io/application-name: vald-mirror-gateway + pyroscope.io/profile-cpu-enabled: "true" + pyroscope.io/profile-mem-enabled: "true" + pyroscope.io/port: "6060" + spec: + initContainers: + - name: wait-for-gateway-lb + image: busybox:stable + command: + - /bin/sh + - -e + - -c + - | + until [ "$(wget --server-response --spider --quiet http://vald-lb-gateway.default.svc.cluster.local:3001/readiness 2>&1 | awk 'NR==1{print $2}')" == "200" ]; do + echo "waiting for gateway-lb to be ready..." + sleep 2; + done + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: [] + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: [] + requiredDuringSchedulingIgnoredDuringExecution: [] + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - vald-mirror-gateway + topologyKey: kubernetes.io/hostname + weight: 100 + requiredDuringSchedulingIgnoredDuringExecution: [] + containers: + - name: vald-mirror-gateway + image: "vdaas/vald-mirror-gateway:nightly" + imagePullPolicy: Always + livenessProbe: + failureThreshold: 2 + httpGet: + path: /liveness + port: liveness + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 3 + successThreshold: 1 + timeoutSeconds: 2 + readinessProbe: + failureThreshold: 2 + httpGet: + path: /readiness + port: readiness + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 3 + successThreshold: 1 + timeoutSeconds: 2 + startupProbe: + failureThreshold: 30 + httpGet: + path: /liveness + port: liveness + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 2 + ports: + - name: liveness + protocol: TCP + containerPort: 3000 + - name: readiness + protocol: TCP + containerPort: 3001 + - name: grpc + protocol: TCP + containerPort: 8081 + - name: pprof + protocol: TCP + containerPort: 6060 + resources: + limits: + cpu: 2000m + memory: 700Mi + requests: + cpu: 200m + memory: 150Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumeMounts: + - name: vald-mirror-gateway-config + mountPath: /etc/server/ + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + serviceAccountName: gateway-mirror + securityContext: + fsGroup: 65532 + fsGroupChangePolicy: OnRootMismatch + runAsGroup: 65532 + runAsNonRoot: true + runAsUser: 65532 + terminationGracePeriodSeconds: 30 + volumes: + - name: vald-mirror-gateway-config + configMap: + defaultMode: 420 + name: vald-mirror-gateway-config + priorityClassName: default-vald-mirror-gateway-priority +status: diff --git a/k8s/gateway/gateway/mirror/hpa.yaml b/k8s/gateway/gateway/mirror/hpa.yaml new file mode 100644 index 0000000000..d3d4d03b69 --- /dev/null +++ b/k8s/gateway/gateway/mirror/hpa.yaml @@ -0,0 +1,35 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: autoscaling/v1 +kind: HorizontalPodAutoscaler +metadata: + name: vald-mirror-gateway + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror +spec: + maxReplicas: 9 + minReplicas: 3 + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: vald-mirror-gateway + targetCPUUtilizationPercentage: 80 +status: diff --git a/k8s/gateway/gateway/mirror/pdb.yaml b/k8s/gateway/gateway/mirror/pdb.yaml new file mode 100644 index 0000000000..37f81f96ee --- /dev/null +++ b/k8s/gateway/gateway/mirror/pdb.yaml @@ -0,0 +1,31 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: vald-mirror-gateway + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror +spec: + maxUnavailable: 50% + selector: + matchLabels: + app: vald-mirror-gateway diff --git a/k8s/gateway/gateway/mirror/priorityclass.yaml b/k8s/gateway/gateway/mirror/priorityclass.yaml new file mode 100644 index 0000000000..a3be44bc0a --- /dev/null +++ b/k8s/gateway/gateway/mirror/priorityclass.yaml @@ -0,0 +1,29 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: default-vald-mirror-gateway-priority + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror +value: 1e+06 +globalDefault: false +description: "A priority class for Vald mirror gateway." diff --git a/k8s/gateway/gateway/mirror/serviceaccount.yaml b/k8s/gateway/gateway/mirror/serviceaccount.yaml new file mode 100644 index 0000000000..ac49327d35 --- /dev/null +++ b/k8s/gateway/gateway/mirror/serviceaccount.yaml @@ -0,0 +1,26 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: ServiceAccount +metadata: + name: gateway-mirror + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror diff --git a/k8s/gateway/gateway/mirror/svc.yaml b/k8s/gateway/gateway/mirror/svc.yaml new file mode 100644 index 0000000000..9718722b17 --- /dev/null +++ b/k8s/gateway/gateway/mirror/svc.yaml @@ -0,0 +1,45 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: Service +metadata: + name: vald-mirror-gateway + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.13 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.13 + app.kubernetes.io/component: gateway-mirror +spec: + ports: + - name: grpc + port: 8081 + targetPort: 8081 + protocol: TCP + - name: readiness + port: 3001 + targetPort: 3001 + protocol: TCP + - name: pprof + port: 6060 + targetPort: 6060 + protocol: TCP + selector: + app.kubernetes.io/name: vald + app.kubernetes.io/component: gateway-mirror + clusterIP: None + type: ClusterIP From aa4bbc093f7193a0620cc5945be8a285e111e88a Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 09:43:49 +0900 Subject: [PATCH 4/5] docs: add smorihira as a contributor for tool, and code (#2601) * docs: update README.md [skip ci] * docs: update .all-contributorsrc [skip ci] * style: format code with Gofumpt and Prettier This commit fixes the style issues introduced in da46b1a according to the output from Gofumpt and Prettier. Details: https://github.com/vdaas/vald/pull/2601 --------- Co-authored-by: allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> Co-authored-by: Kiichiro YUKAWA Co-authored-by: deepsource-autofix[bot] <62050782+deepsource-autofix[bot]@users.noreply.github.com> --- .all-contributorsrc | 10 ++++++++++ README.md | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/.all-contributorsrc b/.all-contributorsrc index a750612cb3..f10efcf65c 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -194,6 +194,16 @@ "contributions": [ "doc" ] + }, + { + "login": "smorihira", + "name": "Shunya Morihira (森平 隼矢)", + "avatar_url": "https://avatars.githubusercontent.com/u/105629359?v=4", + "profile": "https://github.com/smorihira", + "contributions": [ + "tool", + "code" + ] } ], "contributorsPerLine": 7, diff --git a/README.md b/README.md index a03458bf5b..94874a6aa8 100755 --- a/README.md +++ b/README.md @@ -265,7 +265,7 @@ make init -[![All Contributors](https://img.shields.io/badge/all_contributors-18-orange.svg?style=flat-square)](#contributors) +[![All Contributors](https://img.shields.io/badge/all_contributors-19-orange.svg?style=flat-square)](#contributors) @@ -299,6 +299,7 @@ Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/d Yusuke Kadowaki
Yusuke Kadowaki

💻 ⚠️ aknishid
aknishid

💻 🚧 📖 Hrichik Mazumder
Hrichik Mazumder

📖 + Shunya Morihira (森平 隼矢)
Shunya Morihira (森平 隼矢)

🔧 💻 From 0e12419069138b6b77158d42acfd73f127cd8d7d Mon Sep 17 00:00:00 2001 From: Kosuke Morimoto Date: Thu, 12 Sep 2024 14:27:07 +0900 Subject: [PATCH 5/5] add check context cancel (#2596) * add check context cancel Signed-off-by: Kosuke Morimoto * fix Signed-off-by: Kosuke Morimoto * fix Signed-off-by: Kosuke Morimoto * fix Signed-off-by: Kosuke Morimoto * fix Signed-off-by: Kosuke Morimoto * fix Signed-off-by: Kosuke Morimoto * fix Signed-off-by: Kosuke Morimoto * fix Signed-off-by: Kosuke Morimoto * update deps Signed-off-by: Kosuke Morimoto --------- Signed-off-by: Kosuke Morimoto Co-authored-by: Yusuke Kato --- internal/backoff/backoff.go | 16 ++++++++++++++++ versions/GO_VERSION | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/internal/backoff/backoff.go b/internal/backoff/backoff.go index 04c1355739..0c30f6571e 100644 --- a/internal/backoff/backoff.go +++ b/internal/backoff/backoff.go @@ -186,6 +186,22 @@ func (b *backoff) Do( dur *= b.backoffFactor jdur = b.addJitter(dur) } + if cnt >= b.maxRetryCount-1 { + select { + case <-dctx.Done(): + switch dctx.Err() { + case context.DeadlineExceeded: + log.Debugf("[backoff]\tfor: "+name+",\tDeadline Exceeded\terror: %v", err.Error()) + return nil, errors.ErrBackoffTimeout(err) + case context.Canceled: + log.Debugf("[backoff]\tfor: "+name+",\tCanceled\terror: %v", err.Error()) + return nil, err + default: + return nil, errors.Join(dctx.Err(), err) + } + default: + } + } } } } diff --git a/versions/GO_VERSION b/versions/GO_VERSION index a6c2798a48..49e0a31d49 100644 --- a/versions/GO_VERSION +++ b/versions/GO_VERSION @@ -1 +1 @@ -1.23.0 +1.23.1