diff --git a/.github/workflows/dockers-index-exportation-image.yaml b/.github/workflows/dockers-index-exportation-image.yaml new file mode 100644 index 0000000000..5e85d2bae8 --- /dev/null +++ b/.github/workflows/dockers-index-exportation-image.yaml @@ -0,0 +1,247 @@ +# +# Copyright (C) 2019-2025 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# DO_NOT_EDIT this workflow file is generated by https://github.com/vdaas/vald/blob/main/hack/docker/gen/main.go + + +name: 'Build docker image: index-exportation' +on: + push: + branches: + - main + - release/v*.* + - '!release/v*.*.*' + tags: + - '*.*.*' + - '*.*.*-*' + - v*.*.* + - v*.*.*-* + pull_request: + paths: + - '!**/*_mock.go' + - '!**/*_test.go' + - .github/actions/docker-build/action.yaml + - .github/workflows/_docker-image.yaml + - .github/workflows/dockers-index-exportation-image.yaml + - Makefile + - Makefile.d/** + - apis/grpc/v1/payload/*.go + - apis/grpc/v1/rpc/errdetails/*.go + - apis/grpc/v1/vald/*.go + - apis/proto/** + - cmd/index/job/exportation/*.go + - dockers/index/job/exportation/Dockerfile + - go.mod + - go.sum + - hack/docker/gen/main.go + - internal/backoff/*.go + - internal/cache/*.go + - internal/cache/cacher/*.go + - internal/cache/gache/*.go + - internal/circuitbreaker/*.go + - internal/client/v1/client/vald/*.go + - internal/config/*.go + - internal/conv/*.go + - internal/db/kvs/pogreb/*.go + - internal/db/kvs/redis/*.go + - internal/db/nosql/cassandra/*.go + - internal/db/rdb/mysql/*.go + - internal/db/rdb/mysql/dbr/*.go + - internal/encoding/json/*.go + - internal/errors/*.go + - internal/file/*.go + - internal/info/*.go + - internal/io/*.go + - internal/k8s/*.go + - internal/log/*.go + - internal/log/format/*.go + - internal/log/glg/*.go + - internal/log/level/*.go + - internal/log/logger/*.go + - internal/log/nop/*.go + - internal/log/retry/*.go + - internal/log/zap/*.go + - internal/net/*.go + - internal/net/control/*.go + - internal/net/grpc/*.go + - internal/net/grpc/admin/*.go + - internal/net/grpc/codes/*.go + - internal/net/grpc/credentials/*.go + - internal/net/grpc/errdetails/*.go + - internal/net/grpc/health/*.go + - internal/net/grpc/interceptor/client/metric/*.go + - internal/net/grpc/interceptor/client/trace/*.go + - internal/net/grpc/interceptor/server/logging/*.go + - internal/net/grpc/interceptor/server/metric/*.go + - internal/net/grpc/interceptor/server/recover/*.go + - internal/net/grpc/interceptor/server/trace/*.go + - internal/net/grpc/keepalive/*.go + - internal/net/grpc/logger/*.go + - internal/net/grpc/pool/*.go + - internal/net/grpc/proto/*.go + - internal/net/grpc/reflection/*.go + - internal/net/grpc/status/*.go + - internal/net/grpc/types/*.go + - internal/net/http/dump/*.go + - internal/net/http/json/*.go + - internal/net/http/metrics/*.go + - internal/net/http/middleware/*.go + - internal/net/http/rest/*.go + - internal/net/http/routing/*.go + - internal/net/quic/*.go + - internal/observability/*.go + - internal/observability/attribute/*.go + - internal/observability/exporter/*.go + - internal/observability/exporter/otlp/*.go + - internal/observability/metrics/*.go + - internal/observability/metrics/grpc/*.go + - internal/observability/metrics/mem/*.go + - internal/observability/metrics/runtime/cgo/*.go + - internal/observability/metrics/runtime/goroutine/*.go + - internal/observability/metrics/version/*.go + - internal/observability/trace/*.go + - internal/os/*.go + - internal/params/*.go + - internal/rand/*.go + - internal/runner/*.go + - internal/safety/*.go + - internal/servers/*.go + - internal/servers/server/*.go + - internal/servers/starter/*.go + - internal/strings/*.go + - internal/sync/*.go + - internal/sync/errgroup/*.go + - internal/sync/semaphore/*.go + - internal/sync/singleflight/*.go + - internal/timeutil/*.go + - internal/timeutil/location/*.go + - internal/tls/*.go + - internal/version/*.go + - pkg/index/job/exportation/config/*.go + - pkg/index/job/exportation/service/*.go + - pkg/index/job/exportation/usecase/*.go + - versions/GO_VERSION + pull_request_target: + paths: + - '!**/*_mock.go' + - '!**/*_test.go' + - .github/actions/docker-build/action.yaml + - .github/workflows/_docker-image.yaml + - .github/workflows/dockers-index-exportation-image.yaml + - Makefile + - Makefile.d/** + - apis/grpc/v1/payload/*.go + - apis/grpc/v1/rpc/errdetails/*.go + - apis/grpc/v1/vald/*.go + - apis/proto/** + - cmd/index/job/exportation/*.go + - dockers/index/job/exportation/Dockerfile + - go.mod + - go.sum + - hack/docker/gen/main.go + - internal/backoff/*.go + - internal/cache/*.go + - internal/cache/cacher/*.go + - internal/cache/gache/*.go + - internal/circuitbreaker/*.go + - internal/client/v1/client/vald/*.go + - internal/config/*.go + - internal/conv/*.go + - internal/db/kvs/pogreb/*.go + - internal/db/kvs/redis/*.go + - internal/db/nosql/cassandra/*.go + - internal/db/rdb/mysql/*.go + - internal/db/rdb/mysql/dbr/*.go + - internal/encoding/json/*.go + - internal/errors/*.go + - internal/file/*.go + - internal/info/*.go + - internal/io/*.go + - internal/k8s/*.go + - internal/log/*.go + - internal/log/format/*.go + - internal/log/glg/*.go + - internal/log/level/*.go + - internal/log/logger/*.go + - internal/log/nop/*.go + - internal/log/retry/*.go + - internal/log/zap/*.go + - internal/net/*.go + - internal/net/control/*.go + - internal/net/grpc/*.go + - internal/net/grpc/admin/*.go + - internal/net/grpc/codes/*.go + - internal/net/grpc/credentials/*.go + - internal/net/grpc/errdetails/*.go + - internal/net/grpc/health/*.go + - internal/net/grpc/interceptor/client/metric/*.go + - internal/net/grpc/interceptor/client/trace/*.go + - internal/net/grpc/interceptor/server/logging/*.go + - internal/net/grpc/interceptor/server/metric/*.go + - internal/net/grpc/interceptor/server/recover/*.go + - internal/net/grpc/interceptor/server/trace/*.go + - internal/net/grpc/keepalive/*.go + - internal/net/grpc/logger/*.go + - internal/net/grpc/pool/*.go + - internal/net/grpc/proto/*.go + - internal/net/grpc/reflection/*.go + - internal/net/grpc/status/*.go + - internal/net/grpc/types/*.go + - internal/net/http/dump/*.go + - internal/net/http/json/*.go + - internal/net/http/metrics/*.go + - internal/net/http/middleware/*.go + - internal/net/http/rest/*.go + - internal/net/http/routing/*.go + - internal/net/quic/*.go + - internal/observability/*.go + - internal/observability/attribute/*.go + - internal/observability/exporter/*.go + - internal/observability/exporter/otlp/*.go + - internal/observability/metrics/*.go + - internal/observability/metrics/grpc/*.go + - internal/observability/metrics/mem/*.go + - internal/observability/metrics/runtime/cgo/*.go + - internal/observability/metrics/runtime/goroutine/*.go + - internal/observability/metrics/version/*.go + - internal/observability/trace/*.go + - internal/os/*.go + - internal/params/*.go + - internal/rand/*.go + - internal/runner/*.go + - internal/safety/*.go + - internal/servers/*.go + - internal/servers/server/*.go + - internal/servers/starter/*.go + - internal/strings/*.go + - internal/sync/*.go + - internal/sync/errgroup/*.go + - internal/sync/semaphore/*.go + - internal/sync/singleflight/*.go + - internal/timeutil/*.go + - internal/timeutil/location/*.go + - internal/tls/*.go + - internal/version/*.go + - pkg/index/job/exportation/config/*.go + - pkg/index/job/exportation/service/*.go + - pkg/index/job/exportation/usecase/*.go + - versions/GO_VERSION +jobs: + build: + uses: ./.github/workflows/_docker-image.yaml + with: + target: index-exportation + secrets: inherit diff --git a/Makefile b/Makefile index 73645e3120..f0e6083aea 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,7 @@ HELM_OPERATOR_IMAGE = $(NAME)-helm-operator INDEX_CORRECTION_IMAGE = $(NAME)-index-correction INDEX_CREATION_IMAGE = $(NAME)-index-creation INDEX_DELETION_IMAGE = $(NAME)-index-deletion +INDEX_EXPORTATION_IMAGE = $(NAME)-index-exportation INDEX_OPERATOR_IMAGE = $(NAME)-index-operator INDEX_SAVE_IMAGE = $(NAME)-index-save LB_GATEWAY_IMAGE = $(NAME)-lb-gateway diff --git a/Makefile.d/build.mk b/Makefile.d/build.mk index e5e175c63c..15d9c6e9bd 100644 --- a/Makefile.d/build.mk +++ b/Makefile.d/build.mk @@ -25,6 +25,7 @@ binary/build: \ cmd/index/job/correction/index-correction \ cmd/index/job/creation/index-creation \ cmd/index/job/deletion/index-deletion \ + cmd/index/job/exportation/index-exportation \ cmd/index/job/readreplica/rotate/readreplica-rotate \ cmd/index/job/save/index-save \ cmd/index/operator/index-operator \ @@ -85,6 +86,10 @@ cmd/index/job/deletion/index-deletion: $(eval CGO_ENABLED = 0) $(call go-build,index/job/deletion,,-static,,,$@) +cmd/index/job/exportation/index-exportation: + $(eval CGO_ENABLED = 0) + $(call go-build,index/job/exportation,,-static,,,$@) + cmd/index/job/save/index-save: $(eval CGO_ENABLED = 0) $(call go-build,index/job/save,,-static,,,$@) @@ -134,6 +139,7 @@ binary/build/zip: \ artifacts/vald-index-correction-$(GOOS)-$(GOARCH).zip \ artifacts/vald-index-creation-$(GOOS)-$(GOARCH).zip \ artifacts/vald-index-deletion-$(GOOS)-$(GOARCH).zip \ + artifacts/vald-index-exportation-$(GOOS)-$(GOARCH).zip \ artifacts/vald-index-operator-$(GOOS)-$(GOARCH).zip \ artifacts/vald-index-save-$(GOOS)-$(GOARCH).zip \ artifacts/vald-lb-gateway-$(GOOS)-$(GOARCH).zip \ @@ -197,6 +203,10 @@ artifacts/vald-index-deletion-$(GOOS)-$(GOARCH).zip: cmd/index/job/deletion/inde $(call mkdir, $(dir $@)) zip --junk-paths $@ $< +artifacts/vald-index-exportation-$(GOOS)-$(GOARCH).zip: cmd/index/job/exportation/index-exportation + $(call mkdir, $(dir $@)) + zip --junk-paths $@ $< + artifacts/vald-index-save-$(GOOS)-$(GOARCH).zip: cmd/index/job/save/index-save $(call mkdir, $(dir $@)) zip --junk-paths $@ $< diff --git a/Makefile.d/docker.mk b/Makefile.d/docker.mk index e4bb5b385d..04ed277034 100644 --- a/Makefile.d/docker.mk +++ b/Makefile.d/docker.mk @@ -37,6 +37,7 @@ docker/build: \ docker/build/index-correction \ docker/build/index-creation \ docker/build/index-deletion \ + docker/build/index-exportation \ docker/build/index-operator \ docker/build/index-save \ docker/build/loadtest \ @@ -65,6 +66,7 @@ docker/xpanes/build: docker/build/index-correction \ docker/build/index-creation \ docker/build/index-deletion \ + docker/build/index-exportation \ docker/build/index-operator \ docker/build/index-save \ docker/build/loadtest \ @@ -354,6 +356,17 @@ docker/build/index-deletion: IMAGE=$(INDEX_DELETION_IMAGE) \ docker/build/image +.PHONY: docker/name/index-exportation +docker/name/index-exportation: + @echo "$(ORG)/$(INDEX_EXPORTATION_IMAGE)" + +.PHONY: docker/build/index-exportation +## build index-exportation image +docker/build/index-exportation: + @make DOCKERFILE="$(ROOTDIR)/dockers/index/job/exportation/Dockerfile" \ + IMAGE=$(INDEX_EXPORTATION_IMAGE) \ + docker/build/image + .PHONY: docker/name/index-operator docker/name/index-operator: @echo "$(ORG)/$(INDEX_OPERATOR_IMAGE)" diff --git a/cmd/index/job/exportation/main.go b/cmd/index/job/exportation/main.go new file mode 100644 index 0000000000..c7493321ca --- /dev/null +++ b/cmd/index/job/exportation/main.go @@ -0,0 +1,59 @@ +// Copyright (C) 2019-2025 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package main + +import ( + "context" + "log" + + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/info" + "github.com/vdaas/vald/internal/runner" + "github.com/vdaas/vald/internal/safety" + "github.com/vdaas/vald/pkg/index/job/exportation/config" + "github.com/vdaas/vald/pkg/index/job/exportation/usecase" +) + +const ( + maxVersion = "v0.0.10" + minVersion = "v0.0.0" + name = "index exportation job" +) + +func main() { + if err := safety.RecoverFunc(func() error { + return runner.Do( + context.Background(), + runner.WithName(name), + runner.WithVersion(info.Version, maxVersion, minVersion), + runner.WithConfigLoader(func(path string) (any, *config.GlobalConfig, error) { + cfg, err := config.NewConfig(path) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to load "+name+"'s configuration") + } + return cfg, &cfg.GlobalConfig, nil + }), + runner.WithDaemonInitializer(func(cfg any) (runner.Runner, error) { + c, ok := cfg.(*config.Data) + if !ok { + return nil, errors.ErrInvalidConfig + } + return usecase.New(c) + }), + ) + })(); err != nil { + log.Fatal(err, info.Get()) + return + } +} diff --git a/cmd/index/job/exportation/sample.yaml b/cmd/index/job/exportation/sample.yaml new file mode 100644 index 0000000000..c397e4b710 --- /dev/null +++ b/cmd/index/job/exportation/sample.yaml @@ -0,0 +1,193 @@ +# +# Copyright (C) 2019-2025 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +version: v0.0.0 +time_zone: JST +logging: + format: raw + level: info + logger: glg +server_config: + servers: + - name: grpc + host: 0.0.0.0 + port: 8081 + grpc: + bidirectional_stream_concurrency: 20 + connection_timeout: "" + header_table_size: 0 + initial_conn_window_size: 0 + initial_window_size: 0 + interceptors: [] + keepalive: + max_conn_age: "" + max_conn_age_grace: "" + max_conn_idle: "" + time: "" + timeout: "" + max_header_list_size: 0 + max_receive_message_size: 0 + max_send_message_size: 0 + read_buffer_size: 0 + write_buffer_size: 0 + mode: GRPC + probe_wait_time: 3s + restart: true + health_check_servers: + - name: readiness + host: 0.0.0.0 + port: 3001 + http: + handler_timeout: "" + idle_timeout: "" + read_header_timeout: "" + read_timeout: "" + shutdown_duration: 0s + write_timeout: "" + mode: "" + probe_wait_time: 3s + metrics_servers: + startup_strategy: + - grpc + - readiness + full_shutdown_duration: 600s + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + key: /path/to/key +exporter: + concurrency: 1 + kvs_background_sync_interval: 5s + kvs_background_compaction_interval: 5s + index_path: "/var/export/index" + gateway: + addrs: + - vald-lb-gateway.default.svc.cluster.local:8081 + health_check_duration: "1s" + connection_pool: + enable_dns_resolver: true + enable_rebalance: true + old_conn_close_duration: 2m + rebalance_duration: 30m + size: 3 + backoff: + backoff_factor: 1.1 + backoff_time_limit: 5s + enable_error_log: true + initial_duration: 5ms + jitter_limit: 100ms + maximum_duration: 5s + retry_count: 100 + circuit_breaker: + closed_error_rate: 0.7 + closed_refresh_timeout: 10s + half_open_error_rate: 0.5 + min_samples: 1000 + open_timeout: 1s + call_option: + content_subtype: "" + max_recv_msg_size: 0 + max_retry_rpc_buffer_size: 0 + max_send_msg_size: 0 + wait_for_ready: true + dial_option: + authority: "" + backoff_base_delay: 1s + backoff_jitter: 0.2 + backoff_max_delay: 120s + backoff_multiplier: 1.6 + disable_retry: false + enable_backoff: false + idle_timeout: 1h + initial_connection_window_size: 2097152 + initial_window_size: 1048576 + insecure: true + interceptors: [] + keepalive: + permit_without_stream: false + time: "" + timeout: 30s + max_call_attempts: 0 + max_header_list_size: 0 + max_msg_size: 0 + min_connection_timeout: 20s + net: + dialer: + dual_stack_enabled: true + keepalive: "" + timeout: "" + dns: + cache_enabled: true + cache_expiration: 1h + refresh_duration: 30m + socket_option: + ip_recover_destination_addr: false + ip_transparent: false + reuse_addr: true + reuse_port: true + tcp_cork: false + tcp_defer_accept: false + tcp_fast_open: false + tcp_no_delay: false + tcp_quick_ack: false + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + insecure_skip_verify: false + key: /path/to/key + read_buffer_size: 0 + shared_write_buffer: false + timeout: "" + user_agent: Vald-gRPC + write_buffer_size: 0 + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + insecure_skip_verify: false + key: /path/to/key +observability: + enabled: false + otlp: + collector_endpoint: "otel-collector.monitoring.svc.cluster.local:4317" + trace_batch_timeout: "1s" + trace_export_timeout: "1m" + trace_max_export_batch_size: 1024 + trace_max_queue_size: 256 + metrics_export_interval: "1s" + metrics_export_timeout: "1m" + attribute: + namespace: "_MY_POD_NAMESPACE_" + pod_name: "_MY_POD_NAME_" + node_name: "_MY_NODE_NAME_" + service_name: "vald-index-deletion" + metrics: + enable_cgo: true + enable_goroutine: true + enable_memory: true + enable_version_info: true + version_info_labels: + - vald_version + - server_name + - git_commit + - build_time + - go_version + - go_os + - go_arch + - algorithm_info + trace: + enabled: true diff --git a/dockers/index/job/exportation/Dockerfile b/dockers/index/job/exportation/Dockerfile new file mode 100644 index 0000000000..2ff5f582a6 --- /dev/null +++ b/dockers/index/job/exportation/Dockerfile @@ -0,0 +1,89 @@ +# syntax = docker/dockerfile:latest +# check=error=true +# +# Copyright (C) 2019-2025 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# DO_NOT_EDIT this Dockerfile is generated by https://github.com/vdaas/vald/blob/main/hack/docker/gen/main.go +ARG UPX_OPTIONS=-9 +# skipcq: DOK-DL3026,DOK-DL3007 +FROM ghcr.io/vdaas/vald/vald-buildbase:nightly AS builder +LABEL maintainer="vdaas.org vald team " +# skipcq: DOK-DL3002 +USER root:root +ARG TARGETARCH +ARG TARGETOS +ARG GO_VERSION +ARG RUST_VERSION +ENV APP_NAME=index-exportation +ENV DEBIAN_FRONTEND=noninteractive +ENV GO111MODULE=on +ENV GOPATH=/go +ENV GOROOT=/opt/go +ENV HOME=/root +ENV INITRD=No +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US.UTF-8 +ENV LC_ALL=en_US.UTF-8 +ENV ORG=vdaas +ENV PKG=index/job/exportation +ENV REPO=vald +ENV TZ=Etc/UTC +ENV USER=root +ENV PATH=${GOPATH}/bin:${GOROOT}/bin:/usr/local/bin:${PATH} +WORKDIR ${GOPATH}/src/github.com/${ORG}/${REPO} +SHELL ["/bin/bash", "-o", "pipefail", "-c"] +#skipcq: DOK-W1001, DOK-SC2046, DOK-SC2086, DOK-DL3008 +RUN --mount=type=bind,target=.,rw \ + --mount=type=tmpfs,target=/tmp \ + --mount=type=cache,target=/var/lib/apt,sharing=locked,id=${APP_NAME} \ + --mount=type=cache,target=/var/cache/apt,sharing=locked,id=${APP_NAME} \ + --mount=type=cache,target="${GOPATH}/pkg",id="go-build-${TARGETARCH}" \ + --mount=type=cache,target="${HOME}/.cache/go-build",id="go-build-${TARGETARCH}" \ + --mount=type=tmpfs,target="${GOPATH}/src" \ + set -ex \ + && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache \ + && echo 'APT::Install-Recommends "false";' > /etc/apt/apt.conf.d/no-install-recommends \ + && apt-get clean \ + && apt-get update -y \ + && apt-get upgrade -y \ + && apt-get install -y --no-install-recommends --fix-missing \ + build-essential \ + ca-certificates \ + curl \ + tzdata \ + locales \ + git \ + && ldconfig \ + && echo "${LANG} UTF-8" > /etc/locale.gen \ + && ln -fs /usr/share/zoneinfo/${TZ} /etc/localtime \ + && locale-gen ${LANGUAGE} \ + && update-locale LANG=${LANGUAGE} \ + && dpkg-reconfigure -f noninteractive tzdata \ + && apt-get clean \ + && apt-get autoclean -y \ + && apt-get autoremove -y \ + && make GOPATH="${GOPATH}" GOROOT="${GOROOT}" GO_VERSION="${GO_VERSION}" go/install \ + && make GOPATH="${GOPATH}" GOROOT="${GOROOT}" GO_VERSION="${GO_VERSION}" go/download \ + && make GOARCH="${TARGETARCH}" GOOS="${TARGETOS}" REPO="${ORG}" NAME="${REPO}" cmd/${PKG}/${APP_NAME} \ + && mv "cmd/${PKG}/${APP_NAME}" "/usr/bin/${APP_NAME}" +# skipcq: DOK-DL3026,DOK-DL3007 +FROM gcr.io/distroless/static:nonroot +LABEL maintainer="vdaas.org vald team " +COPY --from=builder /usr/bin/index-exportation /usr/bin/index-exportation +COPY cmd/index/job/exportation/sample.yaml /etc/server/config.yaml +# skipcq: DOK-DL3002 +USER nonroot:nonroot +ENTRYPOINT ["/usr/bin/index-exportation"] diff --git a/hack/docker/gen/main.go b/hack/docker/gen/main.go index 811f7d12ff..d90242ba7e 100644 --- a/hack/docker/gen/main.go +++ b/hack/docker/gen/main.go @@ -735,6 +735,10 @@ func main() { AppName: "index-deletion", PackageDir: "index/job/deletion", }, + "vald-index-exportation": { + AppName: "index-exportation", + PackageDir: "index/job/exportation", + }, "vald-readreplica-rotate": { AppName: "readreplica-rotate", PackageDir: "index/job/readreplica/rotate", diff --git a/internal/config/index_exporter.go b/internal/config/index_exporter.go new file mode 100644 index 0000000000..5065392df1 --- /dev/null +++ b/internal/config/index_exporter.go @@ -0,0 +1,43 @@ +// Copyright (C) 2019-2025 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package config + +// IndexExporter represents the configurations for index exportation. +type IndexExporter struct { + // Concurrency represents indexing concurrency. + Concurrency int `json:"concurrency" yaml:"concurrency"` + + // KVSBackgroundSyncInterval represents interval for checked id list kvs sync duration + KVSBackgroundSyncInterval string `json:"kvs_background_sync_interval" yaml:"kvs_background_sync_interval"` + + // KVSBackgroundCompactionInterval represents interval for checked id list kvs compaction duration + KVSBackgroundCompactionInterval string `json:"kvs_background_compaction_interval" yaml:"kvs_background_compaction_interval"` + + // IndexPath represents the export index file path + IndexPath string `json:"index_path,omitempty" yaml:"index_path"` + + // Gateway represent gateway service configuration + Gateway *GRPCClient `json:"gateway" yaml:"gateway"` +} + +func (e *IndexExporter) Bind() *IndexExporter { + e.KVSBackgroundCompactionInterval = GetActualValue(e.KVSBackgroundCompactionInterval) + e.KVSBackgroundSyncInterval = GetActualValue(e.KVSBackgroundSyncInterval) + e.IndexPath = GetActualValue(e.IndexPath) + + if e.Gateway != nil { + e.Gateway = e.Gateway.Bind() + } + return e +} diff --git a/k8s/index/job/exportation/configmap.yaml b/k8s/index/job/exportation/configmap.yaml new file mode 100644 index 0000000000..6e9d81a0f1 --- /dev/null +++ b/k8s/index/job/exportation/configmap.yaml @@ -0,0 +1,211 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: vald-index-exportation-config + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.16 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.16 + app.kubernetes.io/component: vald-index-exportation +data: + config.yaml: | + --- + version: v0.0.0 + time_zone: JST + logging: + format: raw + level: info + logger: glg + server_config: + servers: + - name: grpc + host: 0.0.0.0 + port: 8081 + grpc: + bidirectional_stream_concurrency: 20 + connection_timeout: "" + enable_admin: true + enable_channelz: true + enable_reflection: true + header_table_size: 0 + initial_conn_window_size: 2097152 + initial_window_size: 1048576 + interceptors: + - RecoverInterceptor + keepalive: + max_conn_age: "" + max_conn_age_grace: "" + max_conn_idle: "" + min_time: 10m + permit_without_stream: false + time: 3h + timeout: 60s + max_concurrent_streams: 0 + max_header_list_size: 0 + max_receive_message_size: 0 + max_send_message_size: 0 + num_stream_workers: 0 + read_buffer_size: 0 + shared_write_buffer: false + wait_for_handlers: true + write_buffer_size: 0 + socket_option: + ip_recover_destination_addr: false + ip_transparent: false + reuse_addr: true + reuse_port: true + tcp_cork: false + tcp_defer_accept: false + tcp_fast_open: false + tcp_no_delay: false + tcp_quick_ack: false + socket_path: "" + mode: GRPC + health_check_servers: + - name: readiness + host: 0.0.0.0 + port: 3001 + http: + handler_timeout: "" + idle_timeout: "" + read_header_timeout: "" + read_timeout: "" + shutdown_duration: 0s + write_timeout: "" + mode: "" + probe_wait_time: 3s + metrics_servers: + startup_strategy: + - grpc + - readiness + full_shutdown_duration: 600s + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + key: /path/to/key + exporter: + concurrency: 20 + kvs_background_sync_interval: 5s + kvs_background_compaction_interval: 5s + index_path: "/data/export/index" + gateway: + addrs: + - vald-lb-gateway.default.svc.cluster.local:8081 + health_check_duration: "1s" + connection_pool: + enable_dns_resolver: true + enable_rebalance: true + old_conn_close_duration: 2m + rebalance_duration: 30m + size: 3 + backoff: + backoff_factor: 1.1 + backoff_time_limit: 5s + enable_error_log: true + initial_duration: 5ms + jitter_limit: 100ms + maximum_duration: 5s + retry_count: 100 + circuit_breaker: + closed_error_rate: 0.7 + closed_refresh_timeout: 10s + half_open_error_rate: 0.5 + min_samples: 1000 + open_timeout: 1s + call_option: + content_subtype: "" + max_recv_msg_size: 0 + max_retry_rpc_buffer_size: 0 + max_send_msg_size: 0 + wait_for_ready: true + dial_option: + authority: "" + backoff_base_delay: 1s + backoff_jitter: 0.2 + backoff_max_delay: 120s + backoff_multiplier: 1.6 + disable_retry: false + enable_backoff: false + idle_timeout: 1h + initial_connection_window_size: 2097152 + initial_window_size: 1048576 + insecure: true + interceptors: [] + keepalive: + permit_without_stream: false + time: "" + timeout: 30s + max_call_attempts: 0 + max_header_list_size: 0 + max_msg_size: 0 + min_connection_timeout: 20s + net: + dialer: + dual_stack_enabled: true + keepalive: "" + timeout: "" + dns: + cache_enabled: true + cache_expiration: 1h + refresh_duration: 30m + socket_option: + ip_recover_destination_addr: false + ip_transparent: false + reuse_addr: true + reuse_port: true + tcp_cork: false + tcp_defer_accept: false + tcp_fast_open: false + tcp_no_delay: false + tcp_quick_ack: false + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + insecure_skip_verify: false + key: /path/to/key + read_buffer_size: 0 + shared_write_buffer: false + timeout: "" + user_agent: Vald-gRPC + write_buffer_size: 0 + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + insecure_skip_verify: false + key: /path/to/key + observability: + enabled: false + otlp: + collector_endpoint: "otel-collector.monitoring.svc.cluster.local:4317" + trace_batch_timeout: "1s" + trace_export_timeout: "1m" + trace_max_export_batch_size: 1024 + trace_max_queue_size: 256 + metrics_export_interval: "1s" + metrics_export_timeout: "1m" + attribute: + namespace: "_MY_POD_NAMESPACE_" + pod_name: "_MY_POD_NAME_" + node_name: "_MY_NODE_NAME_" + service_name: "vald-index-exportation" + metrics: + enable_cgo: true + enable_goroutine: true + enable_memory: true + enable_version_info: true + version_info_labels: + - vald_version + - server_name + - git_commit + - build_time + - go_version + - go_os + - go_arch + - algorithm_info + trace: + enabled: true diff --git a/k8s/index/job/exportation/cronjob.yaml b/k8s/index/job/exportation/cronjob.yaml new file mode 100644 index 0000000000..ec93b16074 --- /dev/null +++ b/k8s/index/job/exportation/cronjob.yaml @@ -0,0 +1,139 @@ +# +# Copyright (C) 2019-2025 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vald-index-exportation + labels: + app: vald-index-exportation + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.16 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/component: vald-index-exportation + app.kubernetes.io/version: v1.7.16 +spec: + schedule: "0 * * * *" + concurrencyPolicy: Forbid + suspend: true + startingDeadlineSeconds: 43200 + jobTemplate: + spec: + ttlSecondsAfterFinished: 86400 + template: + metadata: + labels: + app: vald-index-exportation + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.16 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/component: vald-index-exportation + app.kubernetes.io/version: v1.7.16 + annotations: + pyroscope.io/scrape: "true" + pyroscope.io/application-name: vald-index-exportation + pyroscope.io/profile-cpu-enabled: "true" + pyroscope.io/profile-mem-enabled: "true" + pyroscope.io/port: "6060" + spec: + initContainers: + - name: wait-for-gateway-lb + image: busybox:stable + imagePullPolicy: Always + command: + - /bin/sh + - -e + - -c + - | + until [ "$(wget --server-response --spider --quiet http://vald-lb-gateway.default.svc.cluster.local:3001/readiness 2>&1 | awk 'NR==1{print $2}')" == "200" ]; do + echo "waiting for gateway-lb to be ready..." + sleep 2; + done + containers: + - name: vald-index-exportation + image: "vdaas/vald-index-exportation:nightly" + imagePullPolicy: Never + volumeMounts: + - name: vald-index-exportation-config + mountPath: /etc/server/ + - name: export-pvc + mountPath: /data + livenessProbe: + failureThreshold: 2 + httpGet: + path: /liveness + port: liveness + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 3 + successThreshold: 1 + timeoutSeconds: 2 + readinessProbe: + failureThreshold: 2 + httpGet: + path: /readiness + port: readiness + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 3 + successThreshold: 1 + timeoutSeconds: 2 + startupProbe: + failureThreshold: 30 + httpGet: + path: /liveness + port: liveness + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 2 + ports: + - name: liveness + protocol: TCP + containerPort: 3000 + - name: readiness + protocol: TCP + containerPort: 3001 + - name: grpc + protocol: TCP + containerPort: 8081 + - name: pprof + protocol: TCP + containerPort: 6060 + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + restartPolicy: OnFailure + volumes: + - name: vald-index-exportation-config + configMap: + defaultMode: 420 + name: vald-index-exportation-config + - name: export-pvc + persistentVolumeClaim: + claimName: export-pvc diff --git a/k8s/index/job/exportation/pvc.yaml b/k8s/index/job/exportation/pvc.yaml new file mode 100644 index 0000000000..36d0c0918e --- /dev/null +++ b/k8s/index/job/exportation/pvc.yaml @@ -0,0 +1,27 @@ +# +# Copyright (C) 2019-2025 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: export-pvc + namespace: default +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: local-path diff --git a/pkg/index/job/exportation/config/config.go b/pkg/index/job/exportation/config/config.go new file mode 100644 index 0000000000..062b3592ee --- /dev/null +++ b/pkg/index/job/exportation/config/config.go @@ -0,0 +1,66 @@ +// Copyright (C) 2019-2025 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package config + +import ( + "github.com/vdaas/vald/internal/config" + "github.com/vdaas/vald/internal/errors" +) + +// GlobalConfig is a type alias of config.GlobalConfig representing application base configurations. +type GlobalConfig = config.GlobalConfig + +// Data represents the application configurations. +type Data struct { + // GlobalConfig represents application base configurations. + config.GlobalConfig `json:",inline" yaml:",inline"` + + // Server represent all server configurations + Server *config.Servers `json:"server_config" yaml:"server_config"` + + // Observability represents observability configurations. + Observability *config.Observability `json:"observability" yaml:"observability"` + + // Exporter represents auto indexing service configurations. + Exporter *config.IndexExporter `json:"exporter" yaml:"exporter"` +} + +// NewConfig load configurations from file path. +func NewConfig(path string) (cfg *Data, err error) { + cfg = new(Data) + + if err = config.Read(path, &cfg); err != nil { + return nil, err + } + + if cfg != nil { + cfg.Bind() + } else { + return nil, errors.ErrInvalidConfig + } + + if cfg.Observability != nil { + _ = cfg.Observability.Bind() + } else { + cfg.Observability = new(config.Observability).Bind() + } + + if cfg.Exporter != nil { + cfg.Exporter = cfg.Exporter.Bind() + } else { + return nil, errors.ErrInvalidConfig + } + + return cfg, nil +} diff --git a/pkg/index/job/exportation/service/exporter.go b/pkg/index/job/exportation/service/exporter.go new file mode 100644 index 0000000000..b442c60723 --- /dev/null +++ b/pkg/index/job/exportation/service/exporter.go @@ -0,0 +1,222 @@ +// Copyright (C) 2019-2025 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package service + +import ( + "context" + "fmt" + "io" + "os" + "reflect" + "strconv" + "time" + + "github.com/vdaas/vald/apis/grpc/v1/payload" + "github.com/vdaas/vald/apis/grpc/v1/vald" + vc "github.com/vdaas/vald/internal/client/v1/client/vald" + "github.com/vdaas/vald/internal/db/kvs/pogreb" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/file" + "github.com/vdaas/vald/internal/log" + igrpc "github.com/vdaas/vald/internal/net/grpc" + "github.com/vdaas/vald/internal/observability/trace" + "github.com/vdaas/vald/internal/safety" + "github.com/vdaas/vald/internal/sync/errgroup" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +const ( + apiName = "vald/index/job/export" + grpcMethodName = "vald.v1.StreamListObject/" + vald.StreamListObjectRPCName +) + +// Exporter represents an interface for exporting. +type Exporter interface { + StartClient(ctx context.Context) (<-chan error, error) + Start(ctx context.Context) error + PreStop(ctx context.Context) error +} + +type export struct { + eg errgroup.Group + gateway vc.Client + storedVector pogreb.DB + + streamListConcurrency int + backgroundSyncInterval time.Duration + backgroundCompactionInterval time.Duration + indexPath string +} + +// New returns Exporter object if no error occurs. +func New(opts ...Option) (Exporter, error) { + e := new(export) + for _, opt := range append(defaultOpts, opts...) { + if err := opt(e); err != nil { + oerr := errors.ErrOptionFailed(err, reflect.ValueOf(opt)) + e := &errors.ErrCriticalOption{} + if errors.As(oerr, &e) { + log.Error(err) + return nil, oerr + } + log.Warn(oerr) + } + } + + if err := file.MkdirAll(e.indexPath, os.ModePerm); err != nil { + log.Errorf("failed to create dir %s", e.indexPath) + return nil, errors.Wrap(err, "failed to create index path directory") + } + + path := file.Join(e.indexPath, fmt.Sprintf("%s.db", strconv.FormatInt(time.Now().Unix(), 10))) + db, err := pogreb.New(pogreb.WithPath(path), + pogreb.WithBackgroundCompactionInterval(e.backgroundCompactionInterval), + pogreb.WithBackgroundSyncInterval(e.backgroundSyncInterval)) + if err != nil { + log.Errorf("failed to open checked List kvs DB %s", path) + return nil, err + } + e.storedVector = db + return e, nil +} + +// StartClient starts the gRPC client. +func (e *export) StartClient(ctx context.Context) (<-chan error, error) { + ech := make(chan error, 1) + gch, err := e.gateway.Start(ctx) + if err != nil { + return nil, err + } + e.eg.Go(safety.RecoverFunc(func() (err error) { + defer close(ech) + for { + select { + case <-ctx.Done(): + return ctx.Err() + case err = <-gch: + } + if err != nil { + select { + case <-ctx.Done(): + return ctx.Err() + case ech <- err: + } + } + } + })) + return ech, nil +} + +func (e *export) Start(ctx context.Context) error { + err := e.doExportIndex(ctx) + return err +} + +func (e *export) doExportIndex( + ctx context.Context, +) (errs error) { + ctx, span := trace.StartSpan(igrpc.WrapGRPCMethod(ctx, grpcMethodName), apiName+"/service/index.doExportIndex") + defer func() { + if span != nil { + span.End() + } + }() + + emptyReq := new(payload.Object_List_Request) + + eg, egctx := errgroup.WithContext(ctx) + eg.SetLimit(e.streamListConcurrency) + ctx, cancel := context.WithCancelCause(egctx) + gatewayAddrs := e.gateway.GRPCClient().ConnectedAddrs() + if len(gatewayAddrs) == 0 { + log.Errorf("Active gateway is not found: %v", ctx.Err()) + return errors.New("no active gateways available") + } + + conn, err := grpc.NewClient(gatewayAddrs[0], grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return err + } + + vcClient := vc.NewValdClient(conn) + grpcCallOpts := []grpc.CallOption{ + grpc.WaitForReady(true), + } + stream, err := vcClient.StreamListObject(ctx, emptyReq, grpcCallOpts...) + if err != nil || stream == nil { + return err + } + + for { + select { + case <-ctx.Done(): + if !errors.Is(ctx.Err(), context.Canceled) { + log.Errorf("context done unexpectedly: %v", ctx.Err()) + } + if !errors.Is(context.Cause(ctx), io.EOF) { + log.Errorf("context canceled due to %v", ctx.Err()) + } + err = eg.Wait() + if err != nil { + log.Errorf("exporter returned error status errgroup returned error: %v", ctx.Err()) + } else { + log.Infof("exporter finished") + } + return nil + default: + res, err := stream.Recv() + if err != nil { + if errors.Is(err, io.EOF) { + cancel(io.EOF) + } else { + cancel(errors.ErrStreamListObjectStreamFinishedUnexpectedly(err)) + } + } else if res != nil && res.GetVector() != nil && res.GetVector().GetId() != "" { + eg.Go(safety.RecoverFunc(func() (err error) { + objVec := res.GetVector() + log.Infof("received object vector id: %s, timestamp: %d", objVec.GetId(), objVec.GetTimestamp()) + + storedBinary, ok, err := e.storedVector.Get(objVec.GetId()) + if err != nil { + log.Errorf("failed to perform Get from check list but still try to finish processing without cache: %v", err) + return err + } + + var storedObjVec payload.Object_Vector + if ok { + if err := storedObjVec.UnmarshalVT(storedBinary); err != nil { + log.Errorf("failed to Unmarshal proto to payload.Object_Vector: %v", err) + return err + } + } + + isUpsertVector := !ok || storedObjVec.GetTimestamp() < objVec.GetTimestamp() + if isUpsertVector { + dAtA, err := objVec.MarshalVT() + if err != nil { + return err + } + e.storedVector.Set(objVec.GetId(), dAtA) + } + return nil + })) + } + } + } +} + +func (e *export) PreStop(ctx context.Context) error { + return e.storedVector.Close(false) +} diff --git a/pkg/index/job/exportation/service/options.go b/pkg/index/job/exportation/service/options.go new file mode 100644 index 0000000000..7ac8b08b02 --- /dev/null +++ b/pkg/index/job/exportation/service/options.go @@ -0,0 +1,104 @@ +// Copyright (C) 2019-2025 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package service + +import ( + "github.com/vdaas/vald/internal/client/v1/client/vald" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/sync/errgroup" + "github.com/vdaas/vald/internal/timeutil" +) + +type Option func(_ *export) error + +var defaultOpts = []Option{ + WithStreamListConcurrency(200), + WithKVSSyncInterval("5s"), + WithKVSCompactionInterval("5s"), + WithIndexPath("/var/export/index"), + WithErrGroup(errgroup.Get()), +} + +// WithStreamListConcurrency returns Option that sets streamListConcurrency. +func WithStreamListConcurrency(num int) Option { + return func(e *export) error { + if num <= 0 { + return errors.NewErrInvalidOption("streamListConcurrency", num) + } + e.streamListConcurrency = num + return nil + } +} + +// WithKVSSyncInterval returns Option that sets interval for background file sync. +func WithKVSSyncInterval(dur string) Option { + return func(e *export) error { + if dur == "" { + return nil + } + d, err := timeutil.Parse(dur) + if err != nil { + return err + } + e.backgroundSyncInterval = d + return nil + } +} + +// WithKVSCompactionInterval returns Option that sets interval for background file compaction. +func WithKVSCompactionInterval(dur string) Option { + return func(e *export) error { + if dur == "" { + return nil + } + d, err := timeutil.Parse(dur) + if err != nil { + return err + } + e.backgroundCompactionInterval = d + return nil + } +} + +// WithIndexPath returns Option that sets indexPath. +func WithIndexPath(path string) Option { + return func(e *export) error { + if path == "" { + return errors.NewErrInvalidOption("indexPath", path) + } + e.indexPath = path + return nil + } +} + +// WithGateway returns Option that sets gateway client. +func WithGateway(client vald.Client) Option { + return func(e *export) error { + if client == nil { + return errors.NewErrCriticalOption("gateway", client) + } + e.gateway = client + return nil + } +} + +// WithErrGroup returns Option that set errgroup. +func WithErrGroup(eg errgroup.Group) Option { + return func(e *export) error { + if eg != nil { + e.eg = eg + } + return nil + } +} diff --git a/pkg/index/job/exportation/usecase/exportation.go b/pkg/index/job/exportation/usecase/exportation.go new file mode 100644 index 0000000000..ee2b764c00 --- /dev/null +++ b/pkg/index/job/exportation/usecase/exportation.go @@ -0,0 +1,192 @@ +// Copyright (C) 2019-2025 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package usecase + +import ( + "context" + "os" + "syscall" + + "github.com/vdaas/vald/internal/client/v1/client/vald" + iconf "github.com/vdaas/vald/internal/config" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/log" + + "github.com/vdaas/vald/internal/net/grpc" + "github.com/vdaas/vald/internal/net/grpc/interceptor/server/recover" + "github.com/vdaas/vald/internal/observability" + "github.com/vdaas/vald/internal/runner" + "github.com/vdaas/vald/internal/safety" + "github.com/vdaas/vald/internal/servers/server" + "github.com/vdaas/vald/internal/servers/starter" + "github.com/vdaas/vald/internal/sync/errgroup" + "github.com/vdaas/vald/pkg/index/job/exportation/config" + "github.com/vdaas/vald/pkg/index/job/exportation/service" +) + +type run struct { + eg errgroup.Group + cfg *config.Data + observability observability.Observability + server starter.Server + exporter service.Exporter +} + +// New returns Runner instance. +func New(cfg *config.Data) (_ runner.Runner, err error) { + eg := errgroup.Get() + + gOpts, err := cfg.Exporter.Gateway.Opts() + if err != nil { + return nil, err + } + // skipcq: CRT-D0001 + gOpts = append(gOpts, grpc.WithErrGroup(eg)) + + gateway, err := vald.New(vald.WithClient(grpc.New(gOpts...))) + if err != nil { + return nil, err + } + + exporter, err := service.New( + service.WithStreamListConcurrency(cfg.Exporter.Concurrency), + service.WithIndexPath(cfg.Exporter.IndexPath), + service.WithGateway(gateway), + ) + if err != nil { + return nil, err + } + + var obs observability.Observability + if cfg.Observability.Enabled { + obs, err = observability.NewWithConfig( + cfg.Observability, + ) + if err != nil { + return nil, err + } + } + + grpcServerOptions := []server.Option{ + server.WithGRPCOption( + grpc.ChainUnaryInterceptor(recover.RecoverInterceptor()), + grpc.ChainStreamInterceptor(recover.RecoverStreamInterceptor()), + ), + } + + // For health check and metrics + srv, err := starter.New(starter.WithConfig(cfg.Server), + starter.WithGRPC(func(_ *iconf.Server) []server.Option { + return grpcServerOptions + }), + ) + if err != nil { + return nil, err + } + + return &run{ + eg: eg, + cfg: cfg, + observability: obs, + server: srv, + exporter: exporter, + }, nil +} + +// PreStart is a method called before execution of Start, and it invokes the PreStart method of observability. +func (r *run) PreStart(ctx context.Context) error { + if r.observability != nil { + return r.observability.PreStart(ctx) + } + return nil +} + +// Start is a method used to initiate an operation in the run, and it returns a channel for receiving errors +// during the operation and an error representing any initialization errors. +func (r *run) Start(ctx context.Context) (<-chan error, error) { + ech := make(chan error, 3) + var sech, oech <-chan error + if r.observability != nil { + oech = r.observability.Start(ctx) + } + sech = r.server.ListenAndServe(ctx) + cech, err := r.exporter.StartClient(ctx) + if err != nil { + close(ech) + return nil, err + } + + r.eg.Go(safety.RecoverFunc(func() (err error) { + defer func() { + p, err := os.FindProcess(os.Getpid()) + if err != nil { + // using Fatal to avoid this process to be zombie + // skipcq: RVV-A0003 + log.Fatalf("failed to find my pid to kill %v", err) + return + } + log.Info("sending SIGTERM to myself to stop this job") + if err := p.Signal(syscall.SIGTERM); err != nil { + log.Error(err) + } + }() + return r.exporter.Start(ctx) + })) + + r.eg.Go(safety.RecoverFunc(func() (err error) { + defer close(ech) + for { + select { + case <-ctx.Done(): + return ctx.Err() + case err = <-oech: + case err = <-sech: + case err = <-cech: + } + if err != nil { + select { + case <-ctx.Done(): + return errors.Join(ctx.Err(), err) + case ech <- err: + } + } + } + })) + return ech, nil +} + +// PreStop is a method called before execution of Stop. +func (r *run) PreStop(ctx context.Context) error { + return r.exporter.PreStop(ctx) +} + +// Stop is a method used to stop an operation in the run. +func (r *run) Stop(ctx context.Context) (errs error) { + if r.observability != nil { + if err := r.observability.Stop(ctx); err != nil { + errs = errors.Join(errs, err) + } + } + if r.server != nil { + if err := r.server.Shutdown(ctx); err != nil { + errs = errors.Join(errs, err) + } + } + return errs +} + +// PostStop is a method called after execution of Stop. +func (*run) PostStop(_ context.Context) error { + return nil +}