From 9aeb887d9d40063c38841bb994b9e02458e89ee8 Mon Sep 17 00:00:00 2001 From: HighPon Date: Mon, 18 Nov 2024 04:17:07 +0000 Subject: [PATCH] [WIP] feat: Implenet vald index export --- Makefile | 2 +- cmd/index/job/exportation/main.go | 37 ++- cmd/index/job/exportation/sample.yaml | 193 +++++++++++++++ dockers/index/job/exportation/Dockerfile | 1 + internal/config/index_exporter.go | 43 ++++ k8s/index/job/exportation/configmap.yaml | 211 +++++++++++++++++ k8s/index/job/exportation/cronjob.yaml | 151 ++++++++++++ k8s/index/job/exportation/pvc.yaml | 12 + pkg/index/job/exportation/config/config.go | 66 ++++++ pkg/index/job/exportation/service/exporter.go | 222 ++++++++++++++++++ pkg/index/job/exportation/service/options.go | 104 ++++++++ .../job/exportation/usecase/exportation.go | 183 +++++++++++++++ 12 files changed, 1222 insertions(+), 3 deletions(-) create mode 100644 cmd/index/job/exportation/sample.yaml create mode 100644 internal/config/index_exporter.go create mode 100644 k8s/index/job/exportation/configmap.yaml create mode 100644 k8s/index/job/exportation/cronjob.yaml create mode 100644 k8s/index/job/exportation/pvc.yaml create mode 100644 pkg/index/job/exportation/config/config.go create mode 100644 pkg/index/job/exportation/service/exporter.go create mode 100644 pkg/index/job/exportation/service/options.go create mode 100644 pkg/index/job/exportation/usecase/exportation.go diff --git a/Makefile b/Makefile index cfa14f91c2..947af15f19 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ HELM_OPERATOR_IMAGE = $(NAME)-helm-operator INDEX_CORRECTION_IMAGE = $(NAME)-index-correction INDEX_CREATION_IMAGE = $(NAME)-index-creation INDEX_DELETION_IMAGE = $(NAME)-index-deletion -INDEX_EXPORTATION_IMAGE = $(NAME)-index-exportion +INDEX_EXPORTATION_IMAGE = $(NAME)-index-exportation INDEX_IMPORTATION_IMAGE = $(NAME)-index-importation INDEX_OPERATOR_IMAGE = $(NAME)-index-operator INDEX_SAVE_IMAGE = $(NAME)-index-save diff --git a/cmd/index/job/exportation/main.go b/cmd/index/job/exportation/main.go index c5553f225e..8c16dd224b 100644 --- a/cmd/index/job/exportation/main.go +++ b/cmd/index/job/exportation/main.go @@ -13,7 +13,17 @@ // limitations under the License. package main -import "fmt" +import ( + "context" + "log" + + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/info" + "github.com/vdaas/vald/internal/runner" + "github.com/vdaas/vald/internal/safety" + "github.com/vdaas/vald/pkg/index/job/exportation/config" + "github.com/vdaas/vald/pkg/index/job/exportation/usecase" +) const ( maxVersion = "v0.0.10" @@ -22,5 +32,28 @@ const ( ) func main() { - fmt.Println("hello world") + if err := safety.RecoverFunc(func() error { + return runner.Do( + context.Background(), + runner.WithName(name), + runner.WithVersion(info.Version, maxVersion, minVersion), + runner.WithConfigLoader(func(path string) (any, *config.GlobalConfig, error) { + cfg, err := config.NewConfig(path) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to load "+name+"'s configuration") + } + return cfg, &cfg.GlobalConfig, nil + }), + runner.WithDaemonInitializer(func(cfg any) (runner.Runner, error) { + c, ok := cfg.(*config.Data) + if !ok { + return nil, errors.ErrInvalidConfig + } + return usecase.New(c) + }), + ) + })(); err != nil { + log.Fatal(err, info.Get()) + return + } } diff --git a/cmd/index/job/exportation/sample.yaml b/cmd/index/job/exportation/sample.yaml new file mode 100644 index 0000000000..4ed586d02f --- /dev/null +++ b/cmd/index/job/exportation/sample.yaml @@ -0,0 +1,193 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +version: v0.0.0 +time_zone: JST +logging: + format: raw + level: info + logger: glg +server_config: + servers: + - name: grpc + host: 0.0.0.0 + port: 8081 + grpc: + bidirectional_stream_concurrency: 20 + connection_timeout: "" + header_table_size: 0 + initial_conn_window_size: 0 + initial_window_size: 0 + interceptors: [] + keepalive: + max_conn_age: "" + max_conn_age_grace: "" + max_conn_idle: "" + time: "" + timeout: "" + max_header_list_size: 0 + max_receive_message_size: 0 + max_send_message_size: 0 + read_buffer_size: 0 + write_buffer_size: 0 + mode: GRPC + probe_wait_time: 3s + restart: true + health_check_servers: + - name: readiness + host: 0.0.0.0 + port: 3001 + http: + handler_timeout: "" + idle_timeout: "" + read_header_timeout: "" + read_timeout: "" + shutdown_duration: 0s + write_timeout: "" + mode: "" + probe_wait_time: 3s + metrics_servers: + startup_strategy: + - grpc + - readiness + full_shutdown_duration: 600s + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + key: /path/to/key +exporter: + concurrency: 1 + kvs_background_sync_interval: 5s + kvs_background_compaction_interval: 5s + index_path: "/var/export/index" + gateway: + addrs: + - vald-lb-gateway.default.svc.cluster.local:8081 + health_check_duration: "1s" + connection_pool: + enable_dns_resolver: true + enable_rebalance: true + old_conn_close_duration: 2m + rebalance_duration: 30m + size: 3 + backoff: + backoff_factor: 1.1 + backoff_time_limit: 5s + enable_error_log: true + initial_duration: 5ms + jitter_limit: 100ms + maximum_duration: 5s + retry_count: 100 + circuit_breaker: + closed_error_rate: 0.7 + closed_refresh_timeout: 10s + half_open_error_rate: 0.5 + min_samples: 1000 + open_timeout: 1s + call_option: + content_subtype: "" + max_recv_msg_size: 0 + max_retry_rpc_buffer_size: 0 + max_send_msg_size: 0 + wait_for_ready: true + dial_option: + authority: "" + backoff_base_delay: 1s + backoff_jitter: 0.2 + backoff_max_delay: 120s + backoff_multiplier: 1.6 + disable_retry: false + enable_backoff: false + idle_timeout: 1h + initial_connection_window_size: 2097152 + initial_window_size: 1048576 + insecure: true + interceptors: [] + keepalive: + permit_without_stream: false + time: "" + timeout: 30s + max_call_attempts: 0 + max_header_list_size: 0 + max_msg_size: 0 + min_connection_timeout: 20s + net: + dialer: + dual_stack_enabled: true + keepalive: "" + timeout: "" + dns: + cache_enabled: true + cache_expiration: 1h + refresh_duration: 30m + socket_option: + ip_recover_destination_addr: false + ip_transparent: false + reuse_addr: true + reuse_port: true + tcp_cork: false + tcp_defer_accept: false + tcp_fast_open: false + tcp_no_delay: false + tcp_quick_ack: false + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + insecure_skip_verify: false + key: /path/to/key + read_buffer_size: 0 + shared_write_buffer: false + timeout: "" + user_agent: Vald-gRPC + write_buffer_size: 0 + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + insecure_skip_verify: false + key: /path/to/key +observability: + enabled: false + otlp: + collector_endpoint: "otel-collector.monitoring.svc.cluster.local:4317" + trace_batch_timeout: "1s" + trace_export_timeout: "1m" + trace_max_export_batch_size: 1024 + trace_max_queue_size: 256 + metrics_export_interval: "1s" + metrics_export_timeout: "1m" + attribute: + namespace: "_MY_POD_NAMESPACE_" + pod_name: "_MY_POD_NAME_" + node_name: "_MY_NODE_NAME_" + service_name: "vald-index-deletion" + metrics: + enable_cgo: true + enable_goroutine: true + enable_memory: true + enable_version_info: true + version_info_labels: + - vald_version + - server_name + - git_commit + - build_time + - go_version + - go_os + - go_arch + - algorithm_info + trace: + enabled: true diff --git a/dockers/index/job/exportation/Dockerfile b/dockers/index/job/exportation/Dockerfile index e31d6c41d9..495f985dab 100644 --- a/dockers/index/job/exportation/Dockerfile +++ b/dockers/index/job/exportation/Dockerfile @@ -82,6 +82,7 @@ RUN --mount=type=bind,target=.,rw \ FROM gcr.io/distroless/static:nonroot LABEL maintainer="vdaas.org vald team " COPY --from=builder /usr/bin/index-exportation /usr/bin/index-exportation +COPY cmd/index/job/exportation/sample.yaml /etc/server/config.yaml # skipcq: DOK-DL3002 USER nonroot:nonroot ENTRYPOINT ["/usr/bin/index-exportation"] \ No newline at end of file diff --git a/internal/config/index_exporter.go b/internal/config/index_exporter.go new file mode 100644 index 0000000000..f2f9e3303f --- /dev/null +++ b/internal/config/index_exporter.go @@ -0,0 +1,43 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package config + +// IndexExporter represents the configurations for index exportation. +type IndexExporter struct { + // Concurrency represents indexing concurrency. + Concurrency int `json:"concurrency" yaml:"concurrency"` + + // KVSBackgroundSyncInterval represents interval for checked id list kvs sync duration + KVSBackgroundSyncInterval string `json:"kvs_background_sync_interval" yaml:"kvs_background_sync_interval"` + + // KVSBackgroundCompactionInterval represents interval for checked id list kvs compaction duration + KVSBackgroundCompactionInterval string `json:"kvs_background_compaction_interval" yaml:"kvs_background_compaction_interval"` + + // IndexPath represents the export index file path + IndexPath string `json:"index_path,omitempty" yaml:"index_path"` + + // Gateway represent gateway service configuration + Gateway *GRPCClient `json:"gateway" yaml:"gateway"` +} + +func (e *IndexExporter) Bind() *IndexExporter { + e.KVSBackgroundCompactionInterval = GetActualValue(e.KVSBackgroundCompactionInterval) + e.KVSBackgroundSyncInterval = GetActualValue(e.KVSBackgroundSyncInterval) + e.IndexPath = GetActualValue(e.IndexPath) + + if e.Gateway != nil { + e.Gateway = e.Gateway.Bind() + } + return e +} diff --git a/k8s/index/job/exportation/configmap.yaml b/k8s/index/job/exportation/configmap.yaml new file mode 100644 index 0000000000..4f781a838d --- /dev/null +++ b/k8s/index/job/exportation/configmap.yaml @@ -0,0 +1,211 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: vald-index-exportation-config + labels: + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.14 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/version: v1.7.14 + app.kubernetes.io/component: vald-index-exportation +data: + config.yaml: | + --- + version: v0.0.0 + time_zone: JST + logging: + format: raw + level: info + logger: glg + server_config: + servers: + - name: grpc + host: 0.0.0.0 + port: 8081 + grpc: + bidirectional_stream_concurrency: 20 + connection_timeout: "" + enable_admin: true + enable_channelz: true + enable_reflection: true + header_table_size: 0 + initial_conn_window_size: 2097152 + initial_window_size: 1048576 + interceptors: + - RecoverInterceptor + keepalive: + max_conn_age: "" + max_conn_age_grace: "" + max_conn_idle: "" + min_time: 10m + permit_without_stream: false + time: 3h + timeout: 60s + max_concurrent_streams: 0 + max_header_list_size: 0 + max_receive_message_size: 0 + max_send_message_size: 0 + num_stream_workers: 0 + read_buffer_size: 0 + shared_write_buffer: false + wait_for_handlers: true + write_buffer_size: 0 + socket_option: + ip_recover_destination_addr: false + ip_transparent: false + reuse_addr: true + reuse_port: true + tcp_cork: false + tcp_defer_accept: false + tcp_fast_open: false + tcp_no_delay: false + tcp_quick_ack: false + socket_path: "" + mode: GRPC + health_check_servers: + - name: readiness + host: 0.0.0.0 + port: 3001 + http: + handler_timeout: "" + idle_timeout: "" + read_header_timeout: "" + read_timeout: "" + shutdown_duration: 0s + write_timeout: "" + mode: "" + probe_wait_time: 3s + metrics_servers: + startup_strategy: + - grpc + - readiness + full_shutdown_duration: 600s + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + key: /path/to/key + exporter: + concurrency: 20 + kvs_background_sync_interval: 5s + kvs_background_compaction_interval: 5s + index_path: "/data/export/index" + gateway: + addrs: + - vald-lb-gateway.default.svc.cluster.local:8081 + health_check_duration: "1s" + connection_pool: + enable_dns_resolver: true + enable_rebalance: true + old_conn_close_duration: 2m + rebalance_duration: 30m + size: 3 + backoff: + backoff_factor: 1.1 + backoff_time_limit: 5s + enable_error_log: true + initial_duration: 5ms + jitter_limit: 100ms + maximum_duration: 5s + retry_count: 100 + circuit_breaker: + closed_error_rate: 0.7 + closed_refresh_timeout: 10s + half_open_error_rate: 0.5 + min_samples: 1000 + open_timeout: 1s + call_option: + content_subtype: "" + max_recv_msg_size: 0 + max_retry_rpc_buffer_size: 0 + max_send_msg_size: 0 + wait_for_ready: true + dial_option: + authority: "" + backoff_base_delay: 1s + backoff_jitter: 0.2 + backoff_max_delay: 120s + backoff_multiplier: 1.6 + disable_retry: false + enable_backoff: false + idle_timeout: 1h + initial_connection_window_size: 2097152 + initial_window_size: 1048576 + insecure: true + interceptors: [] + keepalive: + permit_without_stream: false + time: "" + timeout: 30s + max_call_attempts: 0 + max_header_list_size: 0 + max_msg_size: 0 + min_connection_timeout: 20s + net: + dialer: + dual_stack_enabled: true + keepalive: "" + timeout: "" + dns: + cache_enabled: true + cache_expiration: 1h + refresh_duration: 30m + socket_option: + ip_recover_destination_addr: false + ip_transparent: false + reuse_addr: true + reuse_port: true + tcp_cork: false + tcp_defer_accept: false + tcp_fast_open: false + tcp_no_delay: false + tcp_quick_ack: false + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + insecure_skip_verify: false + key: /path/to/key + read_buffer_size: 0 + shared_write_buffer: false + timeout: "" + user_agent: Vald-gRPC + write_buffer_size: 0 + tls: + ca: /path/to/ca + cert: /path/to/cert + enabled: false + insecure_skip_verify: false + key: /path/to/key + observability: + enabled: false + otlp: + collector_endpoint: "otel-collector.monitoring.svc.cluster.local:4317" + trace_batch_timeout: "1s" + trace_export_timeout: "1m" + trace_max_export_batch_size: 1024 + trace_max_queue_size: 256 + metrics_export_interval: "1s" + metrics_export_timeout: "1m" + attribute: + namespace: "_MY_POD_NAMESPACE_" + pod_name: "_MY_POD_NAME_" + node_name: "_MY_NODE_NAME_" + service_name: "vald-index-deletion" + metrics: + enable_cgo: true + enable_goroutine: true + enable_memory: true + enable_version_info: true + version_info_labels: + - vald_version + - server_name + - git_commit + - build_time + - go_version + - go_os + - go_arch + - algorithm_info + trace: + enabled: true diff --git a/k8s/index/job/exportation/cronjob.yaml b/k8s/index/job/exportation/cronjob.yaml new file mode 100644 index 0000000000..60768bbf77 --- /dev/null +++ b/k8s/index/job/exportation/cronjob.yaml @@ -0,0 +1,151 @@ +# +# Copyright (C) 2019-2024 vdaas.org vald team +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +apiVersion: batch/v1 +kind: CronJob +metadata: + name: vald-index-exportation + labels: + app: vald-index-exportation + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.14 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/component: vald-index-exportation + app.kubernetes.io/version: v1.7.14 +spec: + schedule: "* * * * *" + concurrencyPolicy: Forbid + suspend: true + startingDeadlineSeconds: 43200 + jobTemplate: + spec: + ttlSecondsAfterFinished: 86400 + template: + metadata: + labels: + app: vald-index-exportation + app.kubernetes.io/name: vald + helm.sh/chart: vald-v1.7.14 + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/instance: release-name + app.kubernetes.io/component: vald-index-exportation + app.kubernetes.io/version: v1.7.14 + annotations: + pyroscope.io/scrape: "true" + pyroscope.io/application-name: vald-index-exportation + pyroscope.io/profile-cpu-enabled: "true" + pyroscope.io/profile-mem-enabled: "true" + pyroscope.io/port: "6060" + spec: + initContainers: + - name: wait-for-agent + image: busybox:stable + imagePullPolicy: Always + command: + - /bin/sh + - -e + - -c + - | + until [ "$(wget --server-response --spider --quiet http://vald-agent.default.svc.cluster.local:3001/readiness 2>&1 | awk 'NR==1{print $2}')" == "200" ]; do + echo "waiting for agent to be ready..." + sleep 2; + done + - name: wait-for-discoverer + image: busybox:stable + imagePullPolicy: Always + command: + - /bin/sh + - -e + - -c + - | + until [ "$(wget --server-response --spider --quiet http://vald-discoverer.default.svc.cluster.local:3001/readiness 2>&1 | awk 'NR==1{print $2}')" == "200" ]; do + echo "waiting for discoverer to be ready..." + sleep 2; + done + containers: + - name: vald-index-exportation + image: "vdaas/vald-index-exportation:latest" + imagePullPolicy: Never + volumeMounts: + - name: vald-index-exportation-config + mountPath: /etc/server/ + - name: export-pvc + mountPath: /data + livenessProbe: + failureThreshold: 2 + httpGet: + path: /liveness + port: liveness + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 3 + successThreshold: 1 + timeoutSeconds: 2 + readinessProbe: + failureThreshold: 2 + httpGet: + path: /readiness + port: readiness + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 3 + successThreshold: 1 + timeoutSeconds: 2 + startupProbe: + failureThreshold: 30 + httpGet: + path: /liveness + port: liveness + scheme: HTTP + initialDelaySeconds: 5 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 2 + ports: + - name: liveness + protocol: TCP + containerPort: 3000 + - name: readiness + protocol: TCP + containerPort: 3001 + - name: grpc + protocol: TCP + containerPort: 8081 + - name: pprof + protocol: TCP + containerPort: 6060 + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + restartPolicy: OnFailure + volumes: + - name: vald-index-exportation-config + configMap: + defaultMode: 420 + name: vald-index-exportation-config + - name: export-pvc + persistentVolumeClaim: + claimName: export-pvc diff --git a/k8s/index/job/exportation/pvc.yaml b/k8s/index/job/exportation/pvc.yaml new file mode 100644 index 0000000000..474aebc6de --- /dev/null +++ b/k8s/index/job/exportation/pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: export-pvc + namespace: default +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + storageClassName: local-path diff --git a/pkg/index/job/exportation/config/config.go b/pkg/index/job/exportation/config/config.go new file mode 100644 index 0000000000..00c864822e --- /dev/null +++ b/pkg/index/job/exportation/config/config.go @@ -0,0 +1,66 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package config + +import ( + "github.com/vdaas/vald/internal/config" + "github.com/vdaas/vald/internal/errors" +) + +// GlobalConfig is a type alias of config.GlobalConfig representing application base configurations. +type GlobalConfig = config.GlobalConfig + +// Data represents the application configurations. +type Data struct { + // GlobalConfig represents application base configurations. + config.GlobalConfig `json:",inline" yaml:",inline"` + + // Server represent all server configurations + Server *config.Servers `json:"server_config" yaml:"server_config"` + + // Observability represents observability configurations. + Observability *config.Observability `json:"observability" yaml:"observability"` + + // Exporter represents auto indexing service configurations. + Exporter *config.IndexExporter `json:"exporter" yaml:"exporter"` +} + +// NewConfig load configurations from file path. +func NewConfig(path string) (cfg *Data, err error) { + cfg = new(Data) + + if err = config.Read(path, &cfg); err != nil { + return nil, err + } + + if cfg != nil { + cfg.Bind() + } else { + return nil, errors.ErrInvalidConfig + } + + if cfg.Observability != nil { + _ = cfg.Observability.Bind() + } else { + cfg.Observability = new(config.Observability).Bind() + } + + if cfg.Exporter != nil { + cfg.Exporter = cfg.Exporter.Bind() + } else { + return nil, errors.ErrInvalidConfig + } + + return cfg, nil +} diff --git a/pkg/index/job/exportation/service/exporter.go b/pkg/index/job/exportation/service/exporter.go new file mode 100644 index 0000000000..dcd919d03e --- /dev/null +++ b/pkg/index/job/exportation/service/exporter.go @@ -0,0 +1,222 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package service + +import ( + "context" + "fmt" + "io" + "os" + "reflect" + "strconv" + "time" + + "github.com/vdaas/vald/apis/grpc/v1/payload" + "github.com/vdaas/vald/apis/grpc/v1/vald" + vc "github.com/vdaas/vald/internal/client/v1/client/vald" + "github.com/vdaas/vald/internal/db/kvs/pogreb" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/file" + "github.com/vdaas/vald/internal/log" + igrpc "github.com/vdaas/vald/internal/net/grpc" + "github.com/vdaas/vald/internal/observability/trace" + "github.com/vdaas/vald/internal/safety" + "github.com/vdaas/vald/internal/sync/errgroup" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" +) + +const ( + apiName = "vald/index/job/export" + grpcMethodName = "vald.v1.StreamListObject/" + vald.StreamListObjectRPCName +) + +// Exporter represents an interface for exporting. +type Exporter interface { + StartClient(ctx context.Context) (<-chan error, error) + Start(ctx context.Context) error +} + +type export struct { + eg errgroup.Group + gateway vc.Client + storedVector pogreb.DB + + streamListConcurrency int + backgroundSyncInterval time.Duration + backgroundCompactionInterval time.Duration + indexPath string +} + +// New returns Exporter object if no error occurs. +func New(opts ...Option) (Exporter, error) { + e := new(export) + for _, opt := range append(defaultOpts, opts...) { + if err := opt(e); err != nil { + oerr := errors.ErrOptionFailed(err, reflect.ValueOf(opt)) + e := &errors.ErrCriticalOption{} + if errors.As(oerr, &e) { + log.Error(err) + return nil, oerr + } + log.Warn(oerr) + } + } + + if err := file.MkdirAll(e.indexPath, os.ModePerm); err != nil { + log.Errorf("failed to create dir %s", e.indexPath) + return nil, err + } + // Todo: Determine file name + path := file.Join(e.indexPath, fmt.Sprintf("%s.db", strconv.FormatInt(time.Now().Unix(), 10))) + db, err := pogreb.New(pogreb.WithPath(path), + pogreb.WithBackgroundCompactionInterval(e.backgroundCompactionInterval), + pogreb.WithBackgroundSyncInterval(e.backgroundSyncInterval)) + if err != nil { + log.Errorf("failed to open checked List kvs DB %s", path) + return nil, err + } + e.storedVector = db + return e, nil +} + +// StartClient starts the gRPC client. +func (e *export) StartClient(ctx context.Context) (<-chan error, error) { + ech := make(chan error, 1) + gch, err := e.gateway.Start(ctx) + if err != nil { + return nil, err + } + e.eg.Go(safety.RecoverFunc(func() (err error) { + defer close(ech) + for { + select { + case <-ctx.Done(): + return ctx.Err() + case err = <-gch: + } + if err != nil { + select { + case <-ctx.Done(): + return ctx.Err() + case ech <- err: + } + } + } + })) + return ech, nil +} + +func (e *export) Start(ctx context.Context) error { + err := e.doExportIndex(ctx, + func(ctx context.Context, rc vald.ObjectClient, copts ...grpc.CallOption) (vald.Object_StreamListObjectClient, error) { + return rc.StreamListObject(ctx, &payload.Object_List_Request{}, copts...) + }, + ) + return err +} + +func (e *export) doExportIndex( + ctx context.Context, + fn func(ctx context.Context, rc vald.ObjectClient, copts ...grpc.CallOption) (vald.Object_StreamListObjectClient, error), +) (errs error) { + ctx, span := trace.StartSpan(igrpc.WrapGRPCMethod(ctx, grpcMethodName), apiName+"/service/index.doExportIndex") + defer func() { + if span != nil { + span.End() + } + }() + + emptyReq := new(payload.Object_List_Request) + + eg, egctx := errgroup.WithContext(ctx) + eg.SetLimit(e.streamListConcurrency) + ctx, cancel := context.WithCancelCause(egctx) + gatewayAddrs := e.gateway.GRPCClient().ConnectedAddrs() + if len(gatewayAddrs) < 0 { + log.Errorf("Active gateway is not found.: %v ", ctx.Err()) + } + + conn, err := grpc.NewClient(gatewayAddrs[0], grpc.WithTransportCredentials(insecure.NewCredentials())) + if err != nil { + return err + } + + vcClient := vc.NewValdClient(conn) + grpcCallOpts := []grpc.CallOption{ + grpc.WaitForReady(true), + } + // stream, err := fn(ctx, vc.NewValdClient(conn), grpcCallOpts...) + stream, err := vcClient.StreamListObject(ctx, emptyReq, grpcCallOpts...) + if err != nil || stream == nil { + return err + } + + for { + select { + case <-ctx.Done(): + if !errors.Is(ctx.Err(), context.Canceled) { + log.Errorf("context done unexpectedly: %v", ctx.Err()) + } + if !errors.Is(context.Cause(ctx), io.EOF) { + log.Errorf("context canceled due to %v", ctx.Err()) + } + err = eg.Wait() + if err != nil { + log.Errorf("exporter returned error status errgroup returned error: %v", ctx.Err()) + } else { + log.Infof("exporter finished") + } + return nil + default: + res, err := stream.Recv() + if err != nil { + if errors.Is(err, io.EOF) { + cancel(io.EOF) + } else { + cancel(errors.ErrStreamListObjectStreamFinishedUnexpectedly(err)) + } + } else if res != nil && res.GetVector() != nil && res.GetVector().GetId() != "" { + eg.Go(safety.RecoverFunc(func() (err error) { + objVec := res.GetVector() + log.Infof("received object vector id: %s, timestamp: %d", objVec.GetId(), objVec.GetTimestamp()) + + storedBinary, ok, err := e.storedVector.Get(objVec.GetId()) + if err != nil { + log.Errorf("failed to perform Get from check list but still try to finish processing without cache: %v", err) + return err + } + + var storedObjVec payload.Object_Vector + if ok { + if err := storedObjVec.UnmarshalVT(storedBinary); err != nil { + log.Errorf("failed to Unmarshal proto to payload.Object_Vector: %v", err) + return err + } + } + + isUpsertVector := !ok || storedObjVec.GetTimestamp() < objVec.GetTimestamp() + if isUpsertVector { + dAtA, err := objVec.MarshalVT() + if err != nil { + return err + } + e.storedVector.Set(objVec.GetId(), dAtA) + } + return nil + })) + } + } + } +} diff --git a/pkg/index/job/exportation/service/options.go b/pkg/index/job/exportation/service/options.go new file mode 100644 index 0000000000..6d4a5889af --- /dev/null +++ b/pkg/index/job/exportation/service/options.go @@ -0,0 +1,104 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package service + +import ( + "github.com/vdaas/vald/internal/client/v1/client/vald" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/sync/errgroup" + "github.com/vdaas/vald/internal/timeutil" +) + +type Option func(_ *export) error + +var defaultOpts = []Option{ + WithStreamListConcurrency(200), + WithKVSSyncInterval("5s"), + WithKVSCompactionInterval("5s"), + WithIndexPath("/var/export/index"), + WithErrGroup(errgroup.Get()), +} + +// WithStreamListConcurrency returns Option that sets streamListConcurrency. +func WithStreamListConcurrency(num int) Option { + return func(e *export) error { + if num <= 0 { + return errors.NewErrInvalidOption("streamListConcurrency", num) + } + e.streamListConcurrency = num + return nil + } +} + +// WithKVSSyncInterval returns Option that sets interval for background file sync. +func WithKVSSyncInterval(dur string) Option { + return func(e *export) error { + if dur == "" { + return nil + } + d, err := timeutil.Parse(dur) + if err != nil { + return err + } + e.backgroundSyncInterval = d + return nil + } +} + +// WithKVSCompactionInterval returns Option that sets interval for background file compaction. +func WithKVSCompactionInterval(dur string) Option { + return func(e *export) error { + if dur == "" { + return nil + } + d, err := timeutil.Parse(dur) + if err != nil { + return err + } + e.backgroundCompactionInterval = d + return nil + } +} + +// WithIndexPath returns Option that sets indexPath. +func WithIndexPath(path string) Option { + return func(e *export) error { + if path == "" { + return errors.NewErrInvalidOption("indexPath", path) + } + e.indexPath = path + return nil + } +} + +// WithGateway returns Option that sets gateway client. +func WithGateway(client vald.Client) Option { + return func(e *export) error { + if client == nil { + return errors.NewErrCriticalOption("gateway", client) + } + e.gateway = client + return nil + } +} + +// WithErrGroup returns Option that set errgroup. +func WithErrGroup(eg errgroup.Group) Option { + return func(e *export) error { + if eg != nil { + e.eg = eg + } + return nil + } +} diff --git a/pkg/index/job/exportation/usecase/exportation.go b/pkg/index/job/exportation/usecase/exportation.go new file mode 100644 index 0000000000..c244bea159 --- /dev/null +++ b/pkg/index/job/exportation/usecase/exportation.go @@ -0,0 +1,183 @@ +// Copyright (C) 2019-2024 vdaas.org vald team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// You may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package usecase + +import ( + "context" + "os" + "syscall" + + "github.com/vdaas/vald/internal/client/v1/client/vald" + "github.com/vdaas/vald/internal/errors" + "github.com/vdaas/vald/internal/log" + "github.com/vdaas/vald/internal/net/grpc" + "github.com/vdaas/vald/internal/observability" + "github.com/vdaas/vald/internal/runner" + "github.com/vdaas/vald/internal/safety" + "github.com/vdaas/vald/internal/servers/starter" + "github.com/vdaas/vald/internal/sync/errgroup" + "github.com/vdaas/vald/pkg/index/job/exportation/config" + "github.com/vdaas/vald/pkg/index/job/exportation/service" +) + +type run struct { + eg errgroup.Group + cfg *config.Data + observability observability.Observability + server starter.Server + exporter service.Exporter +} + +// New returns Runner instance. +func New(cfg *config.Data) (_ runner.Runner, err error) { + eg := errgroup.Get() + + gOpts, err := cfg.Exporter.Gateway.Opts() + if err != nil { + return nil, err + } + // skipcq: CRT-D0001 + gOpts = append(gOpts, grpc.WithErrGroup(eg)) + + gateway, err := vald.New(vald.WithClient(grpc.New(gOpts...))) + if err != nil { + return nil, err + } + + exporter, err := service.New( + service.WithStreamListConcurrency(cfg.Exporter.Concurrency), + service.WithIndexPath(cfg.Exporter.IndexPath), + service.WithGateway(gateway), + ) + if err != nil { + return nil, err + } + + var obs observability.Observability + if cfg.Observability.Enabled { + obs, err = observability.NewWithConfig( + cfg.Observability, + ) + if err != nil { + return nil, err + } + } + + // grpcServerOptions := []server.Option{ + // server.WithGRPCOption( + // grpc.ChainUnaryInterceptor(recover.RecoverInterceptor()), + // grpc.ChainStreamInterceptor(recover.RecoverStreamInterceptor()), + // ), + // } + + // // For health check and metrics + // srv, err := starter.New(starter.WithConfig(cfg.Server), + // starter.WithGRPC(func(_ *iconf.Server) []server.Option { + // return grpcServerOptions + // }), + // ) + // if err != nil { + // return nil, err + // } + + return &run{ + eg: eg, + cfg: cfg, + observability: obs, + // server: srv, + exporter: exporter, + }, nil +} + +// PreStart is a method called before execution of Start, and it invokes the PreStart method of observability. +func (r *run) PreStart(ctx context.Context) error { + if r.observability != nil { + return r.observability.PreStart(ctx) + } + return nil +} + +// Start is a method used to initiate an operation in the run, and it returns a channel for receiving errors +// during the operation and an error representing any initialization errors. +func (r *run) Start(ctx context.Context) (<-chan error, error) { + ech := make(chan error, 3) + var sech, oech, cech <-chan error + if r.observability != nil { + oech = r.observability.Start(ctx) + } + // sech = r.server.ListenAndServe(ctx) + cech, err := r.exporter.StartClient(ctx) + if err != nil { + close(ech) + return nil, err + } + + r.eg.Go(safety.RecoverFunc(func() (err error) { + defer func() { + p, err := os.FindProcess(os.Getpid()) + if err != nil { + // using Fatal to avoid this process to be zombie + // skipcq: RVV-A0003 + log.Fatalf("failed to find my pid to kill %v", err) + return + } + log.Info("sending SIGTERM to myself to stop this job") + if err := p.Signal(syscall.SIGTERM); err != nil { + log.Error(err) + } + }() + return r.exporter.Start(ctx) + })) + + r.eg.Go(safety.RecoverFunc(func() (err error) { + defer close(ech) + for { + select { + case <-ctx.Done(): + return ctx.Err() + case err = <-oech: + case err = <-sech: + case err = <-cech: + } + if err != nil { + select { + case <-ctx.Done(): + return errors.Join(ctx.Err(), err) + case ech <- err: + } + } + } + })) + return ech, nil +} + +// PreStop is a method called before execution of Stop. +func (*run) PreStop(_ context.Context) error { + return nil +} + +// Stop is a method used to stop an operation in the run. +func (r *run) Stop(ctx context.Context) (errs error) { + if r.observability != nil { + if err := r.observability.Stop(ctx); err != nil { + errs = errors.Join(errs, err) + } + } + return errs +} + +// PostStop is a method called after execution of Stop. +func (*run) PostStop(_ context.Context) error { + return nil +}