diff --git a/docs/API/README.md b/docs/API/README.md new file mode 100644 index 000000000..cf0e4017a --- /dev/null +++ b/docs/API/README.md @@ -0,0 +1,113 @@ +# API Mock Tests +The [Swagger](swagger.yaml) describes the API used by the Integration repository to support the `catalog`, `registry` and `store` functionalities. + +### Setup mock server +In order to test the API, we can utilize a swagger based mock library which allows ['contract based testing'](https://github.com/stoplightio/prism). + +#### running the swagger mock server +```` +npm install -g @stoplight/prism-cli +```` +Once this tool is installed, the server can be run using the next command +``` +prism mock swagger.yaml +``` +The next endpoints are presented: +``` +[10:32:12 a.m.] › [CLI] ℹ info GET http://127.0.0.1:4010/catalog?limit=41 +[10:32:12 a.m.] › [CLI] ℹ info POST http://127.0.0.1:4010/catalog +[10:32:12 a.m.] › [CLI] ℹ info GET http://127.0.0.1:4010/catalog/sed +[10:32:12 a.m.] › [CLI] ℹ info GET http://127.0.0.1:4010/repository?limit=%5Bobject%20Object%5D +[10:32:12 a.m.] › [CLI] ℹ info POST http://127.0.0.1:4010/repository +[10:32:12 a.m.] › [CLI] ℹ info GET http://127.0.0.1:4010/repository/%5Bobject%20Object%5D +[10:32:12 a.m.] › [CLI] ℹ info GET http://127.0.0.1:4010/store?limit=25 +[10:32:12 a.m.] › [CLI] ℹ info POST http://127.0.0.1:4010/store +[10:32:12 a.m.] › [CLI] ℹ info GET http://127.0.0.1:4010/store/cum +[10:32:12 a.m.] › [CLI] ℹ info POST http://127.0.0.1:4010/store/nihil/validate +[10:32:12 a.m.] › [CLI] ℹ info POST http://127.0.0.1:4010/store/laudantium/upload +[10:32:12 a.m.] › [CLI] ℹ info PUT http://127.0.0.1:4010/store/nihil/activate +[10:32:12 a.m.] › [CLI] ▶ start Prism is listening on http://127.0.0.1:4010 +``` + +Once the server has started, a CURL request can be initiated with any of the above endpoints: + +`GET http://localhost:4010/catalog` + +Would show in the traffic logs: + +`[10:32:23 a.m.] › [HTTP SERVER] get /catalog ℹ info Request received` + +And will result with : + +```json5 + { + "catalog": "observability", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability", + "categories": [ + { + "category": "logs", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs", + "components": [ + { + "component": "log", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/logs", + "container": true, + "tags": [] + }, + { + "component": "http", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/http", + "tags": [], + "container": false + }, + { + "component": "communication", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/communication", + "tags": ["web"], + "container": false + }] + }, + { + "category": "traces", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability/traces", + "components": [ + { + "component": "span", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability/traces/traces", + "tags": [], + "container": true + }] + }, + { + "category": "metrics", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability/metrics", + "components": [ + { + "component": "metric", + "version": "1.0", + "url": "https://github.com/opensearch-project/observability/tree/2.x/schema/observability/metrics/metrics", + "tags": [], + "container": true + }] + } + ] +} +``` + + + +#### running the swagger mock server as docker + +One-liner docker command (assuming running from repository root): + +```text +docker run -p 9200:4010 -v "$(pwd)/docs/API:/api" -v "$(pwd)/schema:/schema" --name integ-prism -t stoplight/prism mock -h 0.0.0.0 /api/swagger.yaml +``` \ No newline at end of file diff --git a/docs/API/swagger.yaml b/docs/API/swagger.yaml new file mode 100644 index 000000000..34d5f4c01 --- /dev/null +++ b/docs/API/swagger.yaml @@ -0,0 +1,1056 @@ +openapi: 3.0.0 +info: + description: This is the Integration & Catalog API + version: "1.0.0" + title: Integration API + license: + name: Apache 2.0 + url: 'http://www.apache.org/licenses/LICENSE-2.0.html' +tags: + - name: admins + description: Secured Admin-only calls + - name: developers + description: Operations available to regular developers +paths: + /catalog: + get: + tags: + - developers + summary: schema list of loaded catalogs + operationId: searchCatalog + description: | + By passing in the appropriate search attributes, you can search for + available catalogs in the system + parameters: + - in: query + name: searchString + description: pass an optional search string for looking up catalog components + required: false + schema: + type: string + - in: query + name: limit + description: maximum number of records to return + schema: + type: integer + format: int32 + minimum: 0 + maximum: 50 + responses: + '200': + description: search results matching criteria + content: + application/json: + schema: + type: array + items: + $ref: '../../schema/system/catalog.schema' + example: + observability: + application/json: + schema: + type: array + items: + $ref: '../../schema/system/catalog.schema' + examples: + catalog: observability + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability + categories: + - category: logs + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs + components: + - component: log + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/logs + container: true + tags: [ ] + - component: http + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/http + tags: [ ] + container: false + - component: communication + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/communication + tags: + - web + container: false + - category: traces + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/traces + components: + - component: span + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/traces/traces + tags: [ ] + container: true + - category: metrics + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/metrics + components: + - component: metric + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/metrics/metrics + tags: [ ] + container: true + '400': + description: bad input parameter + post: + tags: + - admins + summary: adds a catalog item + operationId: addCatalog + description: | + Adds a catalog item to the system, expecting the internal URLs to be accessible + requestBody: + description: Inventory item to add + required: true + content: + application/json: + schema: + $ref: '../../schema/system/catalog.schema' + example: + observability: + application/json: + schema: + type: array + items: + $ref: '../../schema/system/catalog.schema' + examples: + catalog: observability + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability + categories: + - category: logs + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs + components: + - component: log + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/logs + container: true + tags: [ ] + - component: http + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/http + tags: [ ] + container: false + - component: communication + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/communication + tags: + - web + container: false + - category: traces + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/traces + components: + - component: span + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/traces/traces + tags: [ ] + container: true + - category: metrics + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/metrics + components: + - component: metric + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/metrics/metrics + tags: [ ] + container: true + + responses: + '201': + description: item created + '400': + description: 'invalid input, object invalid' + '409': + description: an existing item already exists + /catalog/{id}: + get: + tags: + - developers + summary: get catalog by id + operationId: catalogById + description: | + Get a specific catalog by Id + parameters: + - in: path + name: id + description: catalog instance Id + required: true + schema: + type: string + responses: + '200': + description: catalog + content: + application/json: + schema: + $ref: '../../schema/system/catalog.schema' + example: + observability: + application/json: + schema: + type: array + items: + $ref: '../../schema/system/catalog.schema' + examples: + catalog: observability + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability + categories: + - category: logs + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs + components: + - component: log + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/logs + container: true + tags: [ ] + - component: http + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/http + tags: [ ] + container: false + - component: communication + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/logs/communication + tags: + - web + container: false + - category: traces + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/traces + components: + - component: span + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/traces/traces + tags: [ ] + container: true + - category: metrics + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/metrics + components: + - component: metric + version: '1.0' + url: https://github.com/opensearch-project/observability/tree/2.x/schema/observability/metrics/metrics + tags: [ ] + container: true + '400': + description: bad input parameter + /repository: + get: + tags: + - developers + summary: repository of available integrations + operationId: searchRepository + description: | + By passing in the appropriate search attributes, you can search for + available integration templates in the repository (available for loading into the store) + parameters: + - in: query + name: searchString + description: pass an optional search string for looking up available Integrations template + required: false + schema: + type: string + - in: query + name: limit + description: maximum number of records to return + schema: + type: integer + format: int32 + minimum: 0 + maximum: 50 + responses: + '200': + description: search results matching criteria + content: + application/json: + schema: + type: array + items: + $ref: '../../schema/system/integrations-template-list.schema' + example: + - template-name: nginx + version: 1.0.0 + description: Nginx HTTP server collector + catalog: observability + collections: + - logs + - metrics + - name: apache HTTP server + version: 1.0.0 + description: Http Apache server + catalog: observability + collections: + - logs + - metrics + - name: VPC Flow Logs + version: 1.0.0 + description: " Log and View Network Traffic Flows" + catalog: observability + collections: + - logs + + '400': + description: bad input parameter + post: + tags: + - admins + summary: adds an Integration template item + operationId: addIntegration + description: | + Adds an Integration template to the repository, expecting the internal URLs to be accessible + requestBody: + required: true + description: Inventory template item to add + content: + application/zip: + schema: + $ref: '../../schema/system/integration.schema' + example: + - BLOB including all images, json files, dashboards and documentations + responses: + '201': + description: item created + '400': + description: 'invalid input, object invalid' + '409': + description: an existing item already exists + /repository/{id}: + get: + tags: + - developers + summary: get integration template from the repository by id + operationId: integrationFromRepositoryById + description: | + Get a specific Integration from the repository by Id + parameters: + - in: path + name: id + description: integration name + required: true + schema: + type: string + responses: + '200': + description: Integration template + content: + application/json: + schema: + $ref: '../../schema/system/integration.schema' + example: + template-name: nginx + version: + integration: 0.1.0 + schema: 1.0.0 + resource: "^1.23.0" + description: Nginx HTTP server collector + identification: instrumentationScope.attributes.identification + catalog: observability + components: + - web + - http + collection: + - logs: + - info: access logs + input_type: logfile + dataset: nginx.access + labels: + - nginx + - access + - info: error logs + input_type: logfile + labels: + - nginx + - error + dataset: nginx.error + - metrics: + - info: status metrics + input_type: metrics + dataset: nginx.status + labels: + - nginx + - status + repository: + url: https://github.com/opensearch-project/observability/tree/main/integrarions/nginx + + '400': + description: bad input parameter + /repository/{id}/fields: + get: + tags: + - developers + summary: get integration-template catalog fields + operationId: integrationFieldsById + description: | + Get a specific Integration-template fields by Id + parameters: + - in: path + name: id + description: integration name + required: true + schema: + type: string + responses: + '200': + description: Integration's fields + content: + application/json: + schema: + $ref: '../../schema/system/integration-fields-list.schema' + example: + template-name: nginx + version: 1.0.0 + description: Nginx HTTP server collector + catalog: observability + collections: + - category: logs + components: + - source: logs.mapping + container: true + fields: + severity: + properties: + number: + type: long + text: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + attributes: + type: object + properties: + data_stream: + properties: + dataset: + ignore_above: 128 + type: keyword + namespace: + ignore_above: 128 + type: keyword + type: + ignore_above: 56 + type: keyword + body: + type: text + "@timestamp": + type: date + observedTimestamp: + type: date + traceId: + ignore_above: 256 + type: keyword + spanId: + ignore_above: 256 + type: keyword + schemaUrl: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + instrumentationScope: + properties: + name: + type: text + fields: + keyword: + type: keyword + ignore_above: 128 + version: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + dropped_attributes_count: + type: integer + schemaUrl: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + event: + properties: + domain: + ignore_above: 256 + type: keyword + name: + ignore_above: 256 + type: keyword + category: + ignore_above: 256 + type: keyword + type: + ignore_above: 256 + type: keyword + kind: + ignore_above: 256 + type: keyword + result: + ignore_above: 256 + type: keyword + exception: + properties: + message: + ignore_above: 1024 + type: keyword + type: + ignore_above: 256 + type: keyword + stacktrace: + type: text + - source: http.mapping + container: false + fields: + http: + properties: + flavor: + type: keyword + ignore_above: 256 + user_agent: + type: keyword + ignore_above: 2048 + url: + type: keyword + ignore_above: 2048 + schema: + type: keyword + ignore_above: 1024 + target: + type: keyword + ignore_above: 1024 + route: + type: keyword + ignore_above: 1024 + client.ip: + type: ip + resent_count: + type: integer + request: + type: object + properties: + id: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + body.content: + type: text + bytes: + type: long + method: + type: keyword + ignore_above: 256 + referrer: + type: keyword + ignore_above: 1024 + mime_type: + type: keyword + ignore_above: 1024 + response: + type: object + properties: + id: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + body.content: + type: text + bytes: + type: long + status_code: + type: integer + - source: communication.mapping + fields: + communication: + properties: + sock.family: + type: keyword + ignore_above: 256 + source: + type: object + properties: + address: + type: text + fields: + keyword: + type: keyword + ignore_above: 1024 + domain: + type: text + fields: + keyword: + type: keyword + ignore_above: 1024 + bytes: + type: long + ip: + type: ip + port: + type: long + mac: + type: keyword + ignore_above: 1024 + packets: + type: long + destination: + type: object + properties: + address: + type: text + fields: + keyword: + type: keyword + ignore_above: 1024 + domain: + type: text + fields: + keyword: + type: keyword + ignore_above: 1024 + bytes: + type: long + ip: + type: ip + port: + type: long + mac: + type: keyword + ignore_above: 1024 + packets: + type: long + - category: metrics + components: + - source: metrics.mapping + container: true + fields: + name: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + attributes: + type: object + properties: + data_stream: + properties: + dataset: + ignore_above: 128 + type: keyword + namespace: + ignore_above: 128 + type: keyword + type: + ignore_above: 56 + type: keyword + description: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + unit: + type: keyword + ignore_above: 128 + kind: + type: keyword + ignore_above: 128 + aggregationTemporality: + type: keyword + ignore_above: 128 + monotonic: + type: boolean + startTime: + type: date + "@timestamp": + type: date + observedTimestamp: + type: date_nanos + value: + properties: + int: + type: integer + double: + type: double + buckets: + properties: + count: + type: long + sum: + type: double + max: + type: float + min: + type: float + bucketCount: + type: long + bucketCountsList: + type: long + explicitBoundsList: + type: float + explicitBoundsCount: + type: float + quantiles: + properties: + quantile: + type: double + value: + type: double + quantileValuesCount: + type: long + positiveBuckets: + properties: + count: + type: long + max: + type: float + min: + type: float + negativeBuckets: + properties: + count: + type: long + max: + type: float + min: + type: float + negativeOffset: + type: integer + positiveOffset: + type: integer + zeroCount: + type: long + scale: + type: long + max: + type: float + min: + type: float + sum: + type: float + count: + type: long + exemplar: + properties: + time: + type: date + traceId: + ignore_above: 256 + type: keyword + spanId: + ignore_above: 256 + type: keyword + instrumentationScope: + properties: + name: + type: text + fields: + keyword: + type: keyword + ignore_above: 128 + version: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + droppedAttributesCount: + type: integer + schemaUrl: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + schemaUrl: + type: text + fields: + keyword: + type: keyword + ignore_above: 256 + + '400': + description: bad input parameter + /store: + get: + tags: + - developers + summary: get all stored Integrations + operationId: searchStore + description: | + By passing in the appropriate search attributes, you can search for + available Integrations in the store + parameters: + - in: query + name: searchString + description: pass an optional search string for looking up Integration in the store + required: false + schema: + type: string + - in: query + name: limit + description: maximum number of records to return + schema: + type: integer + format: int32 + minimum: 0 + maximum: 50 + responses: + '200': + description: search results matching criteria + content: + application/json: + schema: + type: array + items: + $ref: '../../schema/system/integration-instance.schema' + example: + template-name: nginx + dataset: prod + namespace: us_east + id: nginx-prod-us_east + version: 0.1.0 + description: Nginx HTTP server collector for east cost prod systems + template: https://github.com/opensearch-project/observability/blob/2.x/integrations/nginx/config.json + creationDate: '2016-08-29T09:12:33.001Z' + status: LOADED + assets: + - name: sso-logs-dashboard-new.ndjson + creationDate: "'2016-08-29T09:12:33.001Z'" + status: LOADED + + '400': + description: bad input parameter + post: + tags: + - admins + summary: adds an Integration item to store + operationId: storeIntegration + description: | + Adds an Integration to the system store, expecting the internal URLs to be accessible + requestBody: + required: true + description: Inventory item to add + content: + application/json: + schema: + $ref: '../../schema/system/integration-instance.schema' + example: + template-name: nginx + dataset: prod + namespace: us_east + id: nginx-prod-us_east + version: 0.1.0 + description: Nginx HTTP server collector for east cost prod systems + template: https://github.com/opensearch-project/observability/blob/2.x/integrations/nginx/config.json + creationDate: '2016-08-29T09:12:33.001Z' + status: LOADED + assets: + - name: sso-logs-dashboard-new.ndjson + creationDate: "'2016-08-29T09:12:33.001Z'" + status: LOADED + + responses: + '201': + description: item created + '400': + description: 'invalid input, object invalid' + '409': + description: an existing item already exists + /store/{id}: + get: + tags: + - developers + summary: get stored Integration status + operationId: integrationStatus + description: | + Get the stored Integration's status + parameters: + - in: path + name: id + description: Integration instance Id + required: true + schema: + type: string + responses: + '200': + description: stored integration status + content: + application/json: + schema: + $ref: '../../schema/system/integration-instance.schema' + example: + template-name: nginx + dataset: prod + namespace: us_east + id: nginx-prod-us_east + version: 0.1.0 + description: Nginx HTTP server collector for east cost prod systems + template: https://github.com/opensearch-project/observability/blob/2.x/integrations/nginx/config.json + creationDate: '2016-08-29T09:12:33.001Z' + status: LOADED + assets: + - name: sso-logs-dashboard-new.ndjson + creationDate: "'2016-08-29T09:12:33.001Z'" + status: LOADED + + '400': + description: bad input parameter + /store/{id}/validate: + post: + tags: + - developers + summary: validates an Integration + operationId: integrationValidate + description: | + validate this integration including all its assets + parameters: + - in: path + name: id + description: Integration instance Id + required: true + schema: + type: string + responses: + '200': + description: integration store status + content: + application/json: + schema: + $ref: '../../schema/system/integration-instance.schema' + example: + template-name: nginx + dataset: prod + namespace: us_east + id: nginx-prod-us_east + version: 0.1.0 + description: Nginx HTTP server collector for east cost prod systems + template: https://github.com/opensearch-project/observability/blob/2.x/integrations/nginx/config.json + creationDate: '2016-08-29T09:12:33.001Z' + status: LOADED + assets: + - name: sso-logs-dashboard-new.ndjson + creationDate: "'2016-08-29T09:12:33.001Z'" + status: LOADED + + '400': + description: bad input parameter + /store/{id}/upload: + post: + tags: + - developers + summary: upload Integration's assets + operationId: integrationUploadAssets + description: | + Load a list of assets associated with this integration into + available Integrations in the system store + requestBody: + required: false + description: Inventory items to upload + content: + application/json: + schema: + $ref: '../../schema/system/integration-instance.schema' + example: + template-name: nginx + dataset: prod + namespace: us_east + id: nginx-prod-us_east + version: 0.1.0 + description: Nginx HTTP server collector for east cost prod systems + template: https://github.com/opensearch-project/observability/blob/2.x/integrations/nginx/config.json + creationDate: '2016-08-29T09:12:33.001Z' + status: LOADED + assets: + - name: sso-logs-dashboard-new.ndjson + creationDate: "'2016-08-29T09:12:33.001Z'" + status: LOADED + + parameters: + - in: path + name: id + description: Integration instance Id + required: true + schema: + type: string + responses: + '200': + description: integration store status + content: + application/json: + schema: + $ref: '../../schema/system/integration-instance.schema' + example: + template-name: nginx + dataset: prod + namespace: us_east + id: nginx-prod-us_east + version: 0.1.0 + description: Nginx HTTP server collector for east cost prod systems + template: https://github.com/opensearch-project/observability/blob/2.x/integrations/nginx/config.json + creationDate: '2016-08-29T09:12:33.001Z' + status: LOADED + assets: + - name: sso-logs-dashboard-new.ndjson + creationDate: "'2016-08-29T09:12:33.001Z'" + status: LOADED + + '400': + description: bad input parameter + /store/{id}/activate: + put: + tags: + - developers + summary: activate an Integration + operationId: integrationActivate + description: | + attempts activating an integration that may be in an intermediate phase + during the storage process + parameters: + - in: path + name: id + description: Integration instance Id + required: true + schema: + type: string + responses: + '200': + description: integration store status + content: + application/json: + schema: + $ref: '../../schema/system/integration-instance.schema' + example: + template-name: nginx + dataset: prod + namespace: us_east + id: nginx-prod-us_east + version: 0.1.0 + description: Nginx HTTP server collector for east cost prod systems + template: https://github.com/opensearch-project/observability/blob/2.x/integrations/nginx/config.json + creationDate: '2016-08-29T09:12:33.001Z' + status: LOADED + assets: + - name: sso-logs-dashboard-new.ndjson + creationDate: "'2016-08-29T09:12:33.001Z'" + status: LOADED + '400': + description: bad input parameter +servers: + - url: http://127.0.0.1:4010 + description: Mock server (uses fake data) \ No newline at end of file diff --git a/docs/Integration-API.md b/docs/Integration-API.md new file mode 100644 index 000000000..89bedf85d --- /dev/null +++ b/docs/Integration-API.md @@ -0,0 +1,306 @@ +# Integration API + +Integrations are a stateful bundle which will be stored inside a system `.integration` index which will reflect the integration's status during the different phases of its lifecycle. + +--- +## Integration UX Loading Lifecycle API + +### Flow Diagram +![flow-diagram](https://user-images.githubusercontent.com/48943349/222320100-cac40749-9e5a-4e90-8ff2-386958adc06d.png) + + +### [Swagger API reference](../docs/API/swagger.yaml) + +### Load Integrations Repository +As part of the Integration Ux workflow, once the Integration plugin is loaded it should load all the available integrations that are bundled in the integration repo. + +![load-integration](https://user-images.githubusercontent.com/48943349/222223963-9c740f33-e156-4541-88cf-67d70131410f.png) + + +The API needed from the backend should be as follows: +Query: +```text +GET _integration/repository?filter=category:logs&component:web +``` +**Response**: +```jsoon +{ + "name": "nginx", + "version": { + "integ": "0.1.0", + "schema": "1.0.0", + "resource": "^1.23.0" + }, + "description": "Nginx HTTP server collector", + "Information":"file:///.../schema/logs/info.html", + "identification": "instrumentationScope.attributes.identification", + "catalog": "observability", + "components": [ + "web","http" + ], + "collection":[ + { + "logs": [{ + "info": "access logs", + "input_type":"logfile", + "dataset":"nginx.access", + "labels" :["nginx","access"], + "schema": "file:///.../schema/logs/access.json" + }, + { + "info": "error logs", + "input_type":"logfile", + "labels" :["nginx","error"], + "dataset":"nginx.error", + "schema": "file:///.../schema/logs/error.json" + }] + }, + { + "metrics": [{ + "info": "status metrics", + "input_type":"metrics", + "dataset":"nginx.status", + "labels" :["nginx","status"], + "schema": "file:///.../schema/logs/status.json" + }] + } + ], + "repo": { + "github": "https://github.com/opensearch-project/observability/tree/main/integrarions/nginx" + } +} +``` +The integration object schema is supported by both B/E & F/E for display & query to the correct fields +[Integration config schema](https://github.com/opensearch-project/observability/blob/9a22f061f568443651fe38d96c864901eed12340/schema/system/integration.schema) + +--- +The backend responsibilities : +- scan the Integration folder (on-load) + - In the future this can also be loaded from a remote publish location + +- load into cache each integration config file +- allow filtering on the registry API + +The frontend responsibilities : +- enable shared info (html, images) resources in a CDN +- allow filtering for the integrations + +![show_repository](https://user-images.githubusercontent.com/48943349/222226930-1d9a684d-7f19-4aaf-b601-32bf7ce08920.png) + +In addition, the following API is also supported +``` +GET _integration/repository/$templat_name +``` +This call results in returning the cached integration config json object +It would be used for display and route the F/E for fetching the relevant page assets + +![view_integrations](https://user-images.githubusercontent.com/48943349/222234012-68b134aa-72b6-4c72-84b4-bd2e5be2e4e5.png) + +Once an integration was selected +![view_integration](https://user-images.githubusercontent.com/48943349/222234188-017498c6-1d09-4d1f-84ee-5b5fa30fd9a4.png) + +This page will require the F/E to fetch multiple assets from different locations +- images +- repository url +- license url +- html +- json schema objects for that integration content + +**implement B/E :** +- registry loading & cache mechanism +- registry API +- registry filter API + +**implement F/E :** +- integrations list display +- integrations filter display +- integration panel display +--- + + +### Load Integration +As part of the Integration Ux workflow, once the Integration plugin has loaded and was selected by the user for loading into the system - the B/E should initiate the loading process and display the appropriate status to reflect the loading steps... + +This phase follows the [previous step](https://github.com/opensearch-project/observability/issues/1441) in which the user has filtered the Integrations from the repository and selected a specific one to load into the system + +### Integration Load workflow +![Integration-load-flow](https://user-images.githubusercontent.com/48943349/222322253-e582b325-8b85-4edf-83ef-402abd54d837.png) + + +### Integration state machine +![Integration-state-machine](https://user-images.githubusercontent.com/48943349/222246887-2be6edc3-1c8a-433a-a154-325fec66d95b.png) + +The API needed from the backend should be as follows: + +Store API: +``` +POST _integration/store/$instance_name +``` +The $instance_name represents the specific name the integration was instanced with - for example, Nginx Integration can be a template for multiple Nginx instances +each representing different domain / aspect such as geographic. + +```jsoon +{ + "template-name": "nginx", + "version": { + "integ": "0.1.0", + "schema": "1.0.0", + "resource": "^1.23.0" + }, + "description": "Nginx HTTP server collector", + "Information":"file:///.../schema/logs/info.html", + "identification": "instrumentationScope.attributes.identification", + "catalog": "observability", + "components": [ + "web","http" + ], + "collection":[ + { + "logs": [{ + "info": "access logs", + "input_type":"logfile", + "dataset":"nginx.access", + "labels" :["nginx","access"], + "schema": "file:///.../schema/logs/access.json" + }, + { + "info": "error logs", + "input_type":"logfile", + "labels" :["nginx","error"], + "dataset":"nginx.error", + "schema": "file:///.../schema/logs/error.json" + }] + }, + { + "metrics": [{ + "info": "status metrics", + "input_type":"metrics", + "dataset":"nginx.status", + "labels" :["nginx","status"], + "schema": "file:///.../schema/logs/status.json" + }] + } + ], + "repo": { + "github": "https://github.com/opensearch-project/observability/tree/main/integrarions/nginx" + } +} +``` +During the UX interaction with the user, user can update data-stream details shown here: +![Screenshot 2023-03-01 at 11 28 00 AM](https://user-images.githubusercontent.com/48943349/222274241-f8689084-5ff5-432f-bc06-83546ac255ec.png) + +If the user keeps all the original data-stream naming convention (namespace and dataset) the next phase would be the validation of the integration prior to loading all the assets. + +### Data-Stream / index naming update +In case the user wants to update the data-stream / index naming details - he may do so using dedicated window. +Selection of the naming convention may also display available existing data-streams that are selectable if the user wants to choose from available ones and not creating new templates. + +Once user changes the data-stream / index pattern - this will be reflected in every asset that has this attribute. + - update the asset name (according to the `instance_name` field) + - `${instance_name}-assetName.json`, this can also be extended using more configurable patterns such as `${instance_name}-{dataset}-{namespace}-assetName.json` + - update the index template's `index_pattern` field with the added pattern + - "index_patterns":` ["sso_logs-*-*"]` -> `["sso_logs-*-*", "myLogs-*"]` + +#### Loading Integration + +The integration has the next steps: + +```text + - LOADING + - VALIDATION + - UPLOAD + - READY +``` +Each step may result on one of two result +- `ready` - this will transition it to the next step +- `maintenance` - this will hold until user fix issues + +Once the Integration instance was stored in the integration `store` index, it will have a `loading` status as displayed in the first image. + +Next the integration instance will undergo a validation phase in which + - assets will be validated with the schema to match fields to the mapping + - assets containing index patterns will be validated any index with these pattern exists + - datasource will be validated to verify connection is accessible + - mapping templates are verified to exist + +If any of the validation failed - the API (the call to `_integration/store/$instance_name` ) will return a status indicating +the current state of the integration: + +**Response**: +```json +{ + "template-name": "nginx", + "instance": "nginx-prod", + "status": "maintenance", + "issues": [ + { + "asset": "dashboard", + "name": "nginx-prod-core", + "url": "file:///.../nginx/integration/assets/nginx-prod-core.ndjson", + "issue": [ + "field cloud.version is not present in mapping sso_log-nginx-prod" + ] + } + ] +} +``` +The next screen shows the Integration assets status: + +![Integration - assets - status](img/Integration-assets-status.png) + + +The next screen shows the maintenance issues: + +![Maintaining - issue](img/maintaining-issue.png) + +Once all the issues are manually resolved by the User, the UX can continue the loading process by the next API +`PUT _integration/store/$instance_name/activate` + +This API attempts to move the state of the integration to `Ready` and returns the result status of this call. + +**Response**: +```json +{ + "instance": "nginx-prod", + "integration-name": "nginx", + "status": "loading" +} +``` + +#### Load Assets + +The loading assets phase will use the existing bulk load api for all the existing assets of the integration + - Visualizations + - Dashboards + - SaveQueries + - MappingTemplates + +The User can chery pick specific assets to load and use the next UX window for this purpose + +Using the next API +`PUT _integration/store/$instance_name/load` +```json +{ + "instance": "nginx-prod", + "integration-name": "nginx", + "assets" :{ + "dashboards": ["nginx-prod-core.ndjson"], + "savedQueries": ["AvgQueriesPerDayFilter.json"] + } +} +``` + +Once these steps are completed, the status of the integration would become `Ready` and the Ux can pull this info using status API : + +`GET _integration/store/$instance_name/status` + +**Response**: +```json +{ + "instance": "nginx-prod", + "integration-name": "nginx", + "status": "ready" +} +``` + +--- diff --git a/docs/Integration-fields-mapping.md b/docs/Integration-fields-mapping.md new file mode 100644 index 000000000..14608d202 --- /dev/null +++ b/docs/Integration-fields-mapping.md @@ -0,0 +1,84 @@ +# Highlights +During the Add-Integration flow - add the ability to map custom index proprietary fields to simple schema standards fields + +## Introduction +Integrations by definition are coupled to a catalog - each input stream of data delivers a well structured event. +In the Observability case - these signals are `trace`,`logs` and `metrics`. + +The purpose of having this well structured events is to unify and correlate the information arriving from the system in a similar manner across all customers and user domains. + +For example, an integration with dashboard asset is coupled for the specific fields it uses to visualize and display information from the data-stream. +This information is assumed to be in the specific structure dictated by the catalog. + +`Observability` : Catalog +`Log` : Category +- `http`: Component + - `url` : URL field + - `client.ip` : client's IP field + - `request.body.content`: request body content field + +These fields must exist and match type in the physical index representing the data-stream. + +## Problem +Some users may already have existing indices that function as data source for these specific displays. The index mapping used by these indices may have different names that represent a similar meaning. + +We would like that the dashboards would work "out of the box" with minimal configuration from the user. +On the other hand customers will not accept changes to the index (reindexing) as part of the Integration installation. + +## Proposed solution +[Field Alias](https://opensearch.org/docs/2.4/opensearch/supported-field-types/alias/) is a solution for connecting similar purpose fields that have different names. + +This capability allows for queries and dashboards to work seamlessly without any changes to the data or index information. + +## Few basic constraints: + +- The original field must be created before the alias is created. +- The original field cannot be an object or another alias. + +For example, the field `request_url` can be connected to the `http.url` field with the next command: +``` +PUT sso_logs-nginx-demo/_mapping +{ + "properties": { + "http.url": { + "type": "alias", + "path": "request_url" + } + } +} +``` +This will allow queries / dashboards using the `http.url` field to execute correctly ... + +We can also validate if an alias exists using the `field_caps` API +``` +GET sso_logs-nginx-demo/_field_caps?fields=http.url +``` + +Returning: +``` +{ + "indices": [ + "sso_logs-nginx-demo" + ], + "fields": { + "http.url": { + "text": { + "type": "text", + "searchable": true, + "aggregatable": false + } + } + } +} +``` +--- + +## User Workflow +Once an integration template is selected for instantiation, the user can select the advanced configuration option. +This option allows the user to select an existing index for the data-stream. Once the user has selected the index - he will be shown a double table for the mapping purpose . + +He can select the original field and the schema destination field- so that the aliasing flow would be performed. + +![fields-mapping](https://user-images.githubusercontent.com/48943349/225185385-a4009c65-533e-4890-a623-6ce7e12f9cc0.png) + +Once the user maps his own fields into the schema standard fields - he will continue the integration instance loading phase which will now also have to create aliases for all the fields the user has selected. \ No newline at end of file diff --git a/docs/Integration-loading.md b/docs/Integration-loading.md new file mode 100644 index 000000000..25adb9f91 --- /dev/null +++ b/docs/Integration-loading.md @@ -0,0 +1,54 @@ +# Integration Loading + +**Loading Integrations into opensearch:** + +This phase describes the use case for a customer using Observability, it describes how such customer loads different Integrations so that it may be used to easily visualize and analyze existing data using pre-canned dashboards and visualizations. + +In the former part (Publishing An Integration) we defined the **Open Search Integration Verification Review Process.** + +The integrations passing this process can be available out of the box once the Observability plugin is loaded. This availability means that these Integrations can be packages together and assembled with the Observability solution. + +Once an Observability is distributed, it is pre-bundled with the verified Integrations. These integrations are packaged in a dedicated folder. + +**Integration Lifecycle** +``` +- LOADING + - VALIDATION + - UPLOAD + - READY +``` +Observability bootstrap initiates the state for all the Integrations bundled with the distribution, the initial state is + +***Loading*** - indicating the integration is still loading and has not yet been verified for runtime readiness. + +- Loading an integration may also allow the user to configure some parts of the Integration so that he could load multiple instances of the same integration template - for example for a service provider with different customers having a similar resource. +- Configure index pattern / name +- Configure custom index fields mapping ([aliasing fields](Integration-fields-mapping.md)) +- Configure datasource (namespace) name (shared by the dashboards, queries, visualizations, alerts) +- Configure security policies + +***Maintenance*** + +indicating some components weren’t loaded / created as required and the appropriate info will be detailed on the missing parts: + + +*- Index may not exist* +*- Dashboard could failed importing (name collision)* +*- Configuration is broken for some component and needs mending* +Once the issues are corrected it will transform to the ***Ready2Ingest** *stage + +→ ***Ready2Ingest*** - indicating the integration was loaded and verified all the needed indices / dashboards are ready - but no data was found matching the expected classification filters. + +→ ***Ready2Read*** - indicating the integration is populating the indices and data can be visualized and queried. + +The system would differentiate from the ***Ready2Ingest*** and ***Ready2Read*** phases using specific queries designed to classify the specific resource data existing in the target index. + + +_**Future Enhancements**_ + +We will be able to add the next phases to the Integration lifecycle - this sub-state can be configured using expected default behaviour and policies. + +- **Ingesting** - meaning the specific resource is continuing to ingest data +- **Stale** - meaning the specific resource has stopped ingesting data + +* * * diff --git a/docs/Integration-plugin-tasks.md b/docs/Integration-plugin-tasks.md new file mode 100644 index 000000000..9c362b803 --- /dev/null +++ b/docs/Integration-plugin-tasks.md @@ -0,0 +1,242 @@ +# Integration Loading Flow + +## Integration plugin start +The integration plugins is currently responsible for the next tasks: + - Catalog registration + - Loading catalog schema templates + - Integration Loading + - Integration state maintenance + +### Catalog Registration + +During the loading of the Integration Plugin it will go over all the [catalog schemas](../../schema/README.md) and creates the appropriate +template mapping for each catalog schema entity. This will allow the future Integration to be validated to the schema catalog they are associated with. + +**For example** - the [Observability](../../schema/observability) catalog will eventually be registered with the next mapping templates + - [Traces](../../schema/observability/traces/traces.mapping) + - [Logs](../../schema/observability/logs/logs.mapping) + - [Metrics](../../schema/observability/metrics/metrics.mapping) + +These mapping specify a backed `data-stream` index pattern they conform with [Naming Pattern](observability/Naming-convention.md). + +**API** +The catalog API can be queries according to the next fields: + - type + - catalog + - category + - version + +`GET _integration/catalog?filter=type:Logs&catalog:Observability&category:web` + + This call will result with the appropriate index_template IDs corresponding to the query: + +```json +{ + "templates": ["sso_logs-template","http-template"], + "catalog":[...], + "category": [...], + "version": [...] +} +``` + +Using the template names one can access the template directly using the `_index_template` URL: + +`GET _index_template/sso_logs-template` + +--- + +### Integrations registry + +During the Integration plugin loading, it will scan the Integration folder (or any resource that functions as the integration repository ) and load each repository [config file](../../schema/system/integration.schema) +into an in memory cache / index allowing to query and filter according to the different integration attributes. + +### External Integrations' registry loading +"External integrations" (ones that are not packaged in the original integrations bundle) can be published by the user. +These "external" integrations are packages as a zip bundle and contain all the relevant resources including: + - `images` + - `assets` + - `documents` + - `icons` + +Once the user has uploaded this zip bundle using the `POST /repository` API, this bundle will be maintained inside the repository index (Blob file or extracted bundle). + +In addition to the repository index, the Integration may use a repository cache that will allow the F/E to retrieve additional content residing in the integration folder directly ( images, html pages, URL's ) + +#### Flow Diagram +![flow-diagram-](https://user-images.githubusercontent.com/48943349/222320100-cac40749-9e5a-4e90-8ff2-386958adc06d.png) + +Once the Integration has completed loading, it will allow to query the cache content using the following REST api: + - Filter integration according to its attributes: +``` +GET _integration/repository?filter=type:Logs&category:web,html +``` + - results a list of integrations + +- Query integration by name: +``` +GET _integration/repository/$template_name +``` +- results a single integration + + +### Integrations Loading + +Once the user has selected which integration he want's to load, her will call the next API: +``` +PUT _integration/store/$instance_name +``` +The body of the request will be the integration config file. It is also possible that during the user interaction he would like to update the index naming pattern that the integration instance will use. +It will be reflected in the appropriate section of the integration config json + +_For example the next observability integration:_ +```json + ... + "collection":[ + { + "logs": [{ + "info": "access logs", + "input_type":"logfile", + "dataset":"nginx.access", <<- subject to user changes + "namespace": "prod",<<- subject to user changes + "labels" :["nginx","access"], + "schema": "file:///.../schema/logs/access.json" + }, + ... +``` +### Loading Step + +The integration has the next steps: +```text + - LOADING + - VALIDATION + - UPLOAD + - READY +``` +Each step may result on one of two result + - `ready` - this will transition it to the next step + - `maintenance` - this will hold until user fix issues + + +After the `_integration/store/$instance_name` API was called the next steps will occur: + + - The integration object will be inserted into the `.integration` index with a `LOADING` status + - During this step the integration engine will rename all the assets names according to the user's given name `${instance_name}-assetName.json` + - `${instance_name}-assetName.json`, this can also be extended using more configurable patterns such as `${instance_name}-{dataset}-{namespace}-assetName.json` + - update the index template's `index_pattern` field with the added pattern + - "index_patterns":` ["sso_logs-*-*"]` -> `["sso_logs-*-*", "myLogs-*"]` + - if user selected custom index with proprietary fields - mapping must be called ([field aliasing](Integration-fields-mapping.md)) +--- + - **Success**: If the user changes the data-stream / index naming pattern - this will also be changes in every assets that supports such capability. + - **Fail**: If the validation fails the integration status would be updated to `maintenance` and an appropriate response should reflect the issues. + + + - **Response**: + ```json + { + "instance": "nginx-prod", + "integration-name": "nginx", + "status": "maintenance", + "phase": "LOADING", + "issues": [] + } + ``` + + - Next the integration will undergo a validation phase - marked with a `VALIDATION` status + - assets will be validated with the schema to match fields to the mapping + - assets containing index patterns will be validated any index with these pattern exists + - datasource will be validated to verify connection is accessible + - mapping templates are verified to exist +--- + - **Success**: If the validation succeeds the integration status would be updated + - **Fail**: If the validation fails the integration status would be updated and the next response would return. + + + - **Response**: + ```json + { + "instance": "nginx-prod", + "integration-name": "nginx", + "status": "maintenance", + "phase": "VALIDATION", + "issues": [ + { + "asset": "dashboard", + "name": "nginx-prod-core", + "url": "file:///.../nginx/integration/assets/nginx-prod-core.ndjson", + "issue": [ + "field cloud.version is not present in mapping sso_log-nginx-prod" + ] + } + ] + } + ``` + + + - The assets are being uploaded to the objects store index, if the users cherry picket specific assets to upload they will be loaded as requested. +--- + - **Success**: If the upload succeeds the integration status would be updated and the user will get the success status response + - **Response:** + ```json + { + "instance": "nginx-prod", + "integration-name": "nginx", + "phase": "UPLOAD", + "status": "ready" + } + ``` + + - **Fail**: If the bulk upload fails the integration status would be updated and the next response would return. + - **Response**: + ```json + { + "instance": "nginx-prod", + "integration-name": "nginx", + "status": "maintenance", + "phase": "VALIDATION", + "issues": [ + { + "asset": "dashboard", + "name": "nginx-prod-core", + "url": "file:///.../nginx/integration/assets/nginx-prod-core.ndjson", + "issue": [ + "field cloud.version is not present in mapping sso_log-nginx-prod" + ] + } + ] + } + ``` +--- + +### Additional supported API: + +Status API for Integration `_integration/store/$instance_name/status` will result in : +- **Response:** + ```json + { + "instance": "nginx-prod", + "integration-name": "nginx", + "phase": "UPLOAD", + "status": "ready" + } + ``` + + +Activate / deactivate integration `_integration/store/$instance_name/activate` / `_integration/store/$instance_name/disable` will result in status : +- **Response:** + ```json + { + "instance": "nginx-prod", + "integration-name": "nginx", + "phase": "DISABLE", + "status": "ready" + } + ``` +#### Deactivation + The result of deactivating an integration would cause all the assets to disable. + +#### Activation + The result of activation would depend on the existing status & phase of the Integration + - if not in is ready status - will try to continue the next phases. + - if is ready status - will try to update status to disabled + + diff --git a/docs/Integration-publishing.md b/docs/Integration-publishing.md new file mode 100644 index 000000000..350882667 --- /dev/null +++ b/docs/Integration-publishing.md @@ -0,0 +1,34 @@ +# Integration Publishing + +Once an integration is created and tested, it should be signed and uploaded into a shared public dedicated repository [The location / owners of this repository should be discussed ] is should be discussed . +Each published Integration artifact will be mandatory to attache the following (which would be validated during the upload: + +**Metadata** +- Owner +- License +- Repository +- Documentation +- Versions +- All relevant versions the testing phase was created with + +**Samples** +- Sample relevant signals data for the integration to be used as exemplar +- *OPTIONAL*: docker compose file including +- The agent generating data / mock data generator +- The Integration artifact part for translating the original format +- OpenSearch Observability relevant version to write the data into + + +#### **Open Search Integration Verification Review Process** + +Once an integration is published, it goes into an OpenSearch Integration review process. +Once an integration is reviewed and validated - it will be published in OpenSearch’s recommendation Integrations and will be able to be assembled in the complete Observability Solution. + +Verification process includes running the docker sample and verifying all the display components are functioning as expected. + +***In the future*** OpenSearch can automate this process by requiring a dedicated API or baseline queries and functionality to work on the Integration thus automating this validation phase completely. + +An investigation can also be published to the public repository without the review process. Integrations not passing this process would not be bundled in the Observability release or be lined and recommended by OpenSearch. Nevertheless they can still be manually Installed in an Observability cluster and the Installing party is responsible for making sure they will operate properly . + +* * * + diff --git a/docs/Integration-structure.md b/docs/Integration-structure.md new file mode 100644 index 000000000..d00a6245b --- /dev/null +++ b/docs/Integration-structure.md @@ -0,0 +1,226 @@ +# Integration structure + +Integrations are an encapsulated collection of assets and a such have a specific structure. +This document presents Integration's structure and convention and shares an example for an NginX resource integration + +**_Metadata_** + + * Integration (data producer) resource + * Indices (mapping & naming) + * Transformation mapping schema + * Optional test harnesses repository + * Verified version and documentation + * Catalog, Category & classification (e.g Observability, logs/traces/alerts/metrics, http) + +**_Display components_** + + * Dashboards + * Maps + * Applications + * Notebooks + * Operations Panels + * Saved Queries: [PPL](https://opensearch.org/docs/2.4/search-plugins/sql/ppl/index/)/[SQL](https://opensearch.org/docs/2.4/search-plugins/sql/sql/index/)/[DQL](https://opensearch.org/docs/2.4/dashboards/discover/dql/) + * Alerts + * Additional Assets + +**_Additional Assets may include_** + * [Datasource configuration](https://opensearch.org/docs/2.4/dashboards/discover/multi-data-sources/) + * Materialized View Table Creation + * S3 schema/table definitions + +The notion that structured data has an enormous contribution to the understanding of the system behaviour is the key role dictating the Integration model. +Once input content has form and shape - it will be used to calculate and correlate different pieces of data. + +### Config File +This file contains the Observability resource configuration details such as + - Observability Produced signal types + - Observability Produced signal category (if applicable) + - Description and resource identification + +### Display Folder +A folder in which the actual visualization components are stored, containing all dashboard resources including + - Application + - Notebook + - Dashboard + - Visualizations + - Maps + +### Queries Folder +A folder containing DKL, SQL, PPL queries on the Observability default or custom indices. + +### Schema +A folder containing the specific fields which this resource is directly populating. +This folder may also contain the transformations mapping between the original observed signal format and the Observability schema. + +### Samples +This folder contains a list of samples signals in the correct schema structure that is to be ingested into Observability indices +Possible original observed signal format signals before they were transformed into the Observability schema. + +## Info +This folder contains all the additional information about the resource producing the Observability signals, additional data about the dashboards and visual components + - documentation describing the resource + - screenshots describing the visual parts + - integration installation process specific details + - additional dependencies and licensing + - repository info this integration is originated from + - metadata info contains additional security and policies definitions + + +--- + +### NginX Integration Sample + +Let's examine the next NginX integration component: + +```yaml +nginX + config.json + assets + display` + ApplicationA.json + Maps.json + DashboardB.json + Alerts.json + queries + QueryA.json + schemas + transformation.json + samples + nginx.access logs + nginx.error logs + nginx.stats metrics + transformed + access-logs.json + error-logs.json + stats.logs + info + documentation + metadata + +``` + +**Definitions** + +- `config.json` defines the general configuration for the entire integration component. +- `display` this is the folder in which the actual visualization components are stored +- `queries` this is the folder in which the actual PPL queries are stored +- `schemas` this is the folder in which the schemas are stored - schema for mapping translations or index mapping. +- `samples` this folder contains sample logs and translated logs are present +- `info` this folder contains documentations, licences and external references + + +`Config.json` file includes the following Integration configuration + +``` +{ + "template_name": "nginx", + "version": { + "integ": "0.1.0", + "schema": "1.0.0", + "resource": "^1.23.0", + } + "description": "Nginx HTTP server collector", + "identification": "instrumentationScope.attributes.identification", + "catalog": "observability", + "components": [ + "web","http" + ], + "collection":[ + { + "logs": [{ + "info": "access logs", + "input_type":"logfile", + "dataset":"nginx.access", + "labels" :["nginx","access"] + }, + { + "info": "error logs", + "input_type":"logfile", + "labels" :["nginx","error"], + "dataset":"nginx.error" + }] + }, + { + "metrics": [{ + "info": "`status metrics`", + "input_type":"`metrics`", + "dataset":"nginx.status", + "labels" :["nginx","status"] + }] + } + ], + "repo": { + "github": "https://github.com/opensearch-project/observability/tree/main/integrations/nginx" + } +} +``` + +**Definitions:** + +``` + "version": { + "integ": "0.1.0", + "schema": "1.0.0", + "resource": "^1.23.0", + } +``` + + +_*`version:`*_ +This references the next semantic versioning: +- `integ` version indicates the version for this specific Integration +- `schema` version indicates the Observability schema version +- `resource ` version indicates the actual resource version which is being integrated. + +_*`identification:`*_ +This references the field this integration is using to explicitly identify the resource the signal is generated from + +In this case the field resides in the `instrumentationScope.attributes.identification` path and should have a value that corresponds to the name of the integration. + + +``` +"identification": "instrumentationScope.attributes.identification", +``` + +`Catalog:` +This defines the catalog source from which the Integration is associated with. This is based on the `catalog` API which is part of the integration support.[Sample Observability catalog](../../schema/system/samples/catalog.json) + +`Components:` +This section defines the classification components associated to this Integration according to [ECS specification](https://www.elastic.co/guide/en/ecs/current/ecs-allowed-values-event-category.html) and expresses in the [Sample Observability catalog](../../schema/system/samples/catalog.json) + +`collection:` +This references the different types of collection this integration if offering. It can be one of the following Observability catalog's element: +{ *`Traces, Logs, Metrics`* }. + +The collection **name** (`logs`,`traces`,`metrics`) reflects the catalog's `category` as it appears in the [Sample Observability catalog](../../schema/system/samples/catalog.json) + +**Collections** + +Let's dive into a specific log collection: + +``` + "logs": [{ + "info": "access logs", + "input_type":"logfile", + "dataset":"nginx.access", + "labels" :["nginx","access"] + }, +``` + +This log collects nginx access logs as described in the `info` section. +The `input_type` is a categorical classification of the log kind which is specified in the ECS specification as well. + +- `dataset` is defined above and indicates the target routing index, in this example `sso_logs-nginx.access-${namespace}` +- `lables` are general purpose labeling tags that allow further correlation and associations. +- `schema` optional parameter - is the location of the mapping configuration between the original log format to the Observability Log format. +* * * + +#### Display: + +Visualization contains the relevant visual components associated with this integration. + +The visual display component will need to be validated to the schema that it is expected to work on - this may be part of the Integration validation flow... + +#### Queries + +Queries contains specific PPL queries that precisely demonstrates some common and useful use-case . diff --git a/docs/Integration-verification.md b/docs/Integration-verification.md new file mode 100644 index 000000000..f91490c9b --- /dev/null +++ b/docs/Integration-verification.md @@ -0,0 +1,59 @@ +# Integration Verification + +After the Integration was developed, it has to be tested and validated prior of publication to a shared repo. +Validation of an Integration is expected to be a build-time phase. It also expects that it will verify that the following + +- **Structure Validation**: + +* make sure the `config.json` is complete and contains all the mandatory parts. +* make sure all the versions correctly reflect the schema files + +- **Schema Validation**: + +* make sure all the input_types defined in the `collections` elements have a compatible transformation schema and this schema complies with the SSO versioned schema. +* make sure all the transformation’s conform to the SSO versioned schema. + +- **Display Validation**: make sure all the display components have a valid json structure and if the explicitly reference fields - these fields must be aligned with the SSO schema type (Trace/Metrics/Logs...) + +- **Query** **Validation**: make sure all the queries have a valid PPL structure and if the explicitly reference fields - these fields must be aligned with the SSO schema type (Trace/Metrics/Logs...) + +- **Assets** **Validation**: make sure all the assets are valid + + - Datasource configuration assets validation - verify datasource is available and alive + - Materialized view configuration assets validation - verify materialized view is accessible + +***_End to End_*** +- **Sample Validation:** + +* make sure the sample outcome of the transformation is compatible with the SSO schema +* make sure the outcome result shares all the transformable information from the input source sample + +All these validations would use a dedicated validation & testing library supplied by SimpleSchema plugin. +* * * + +## Integration Development Test-Harness + +In order to simplify and automate the process of validating an Integration compliant to OpenSearch Observability - we suggest the next Testing harness. Test harness is essentially an End to End standard that requires the developer to setup and provide the next components: + +- Docker compose with the following : + + * Component (Agent / Exporter) responsible of transforming the source format to the Observability SSO format. + * Pipeline that will be used to push Observability signals into OpenSearch Index + * OpenSearch with Observability plugin + * Ready made sample from the original signals that will be used by the transformation component to produce the Observability documents. + * Assembly of Integration components (Dashboards, queries, Visualizations) that will be loaded into the Observability Plugin. + +The test flow will have the following steps: + +**Initiating the Integration Pipline** +- Spin-up the docker compose elements. +- Init the Observability including creation of the Ingestion index and loading of the Integration components +- Start the ingestion pipeline which will take the sample original signals, transform them to Observability and submit to OpenSearch Observability Index. + +Next step would be to run a series of **baseline queries** that should be part of the verification to prove correctness, the queries must match the existing sample data including time and measurements . + +**Result** of these queries (including UX driven queries) are also compared with the expected results and verified for correctness . + +This completes the test verification process and verifies the Integration is compliant with the Observability schema and visual components . + +* * * diff --git a/docs/Integrations.md b/docs/Integrations.md new file mode 100644 index 000000000..87a8bf101 --- /dev/null +++ b/docs/Integrations.md @@ -0,0 +1,151 @@ +# Integrations + +## Content + +_**Highlights**_ +- [Introduction](#introduction) +- [Background](#background) +- [Problem definition](#problem-definition) +- [Proposal](#Proposal) +--- +## Introduction +Integration is a new type of logical component that allows high level composition of multiple Dashboards / Applications / Queries and more. +Integrations can be used to bring together all the metrics and logs from the infrastructure and gain insight into the unified system as a whole. + +Some products address integrations as consisting of the next parts +- data on-boarding +- data cleaning / parsing / transformation +- dashboard configuration / creation. + +This RFC will only address the last part that includes dashboard. Introducing this concept will allow OpenSearch dashboards to be used in a much broader way using pre-canned components (such as display elements and queries). + +Dashboard users which are interested on understanding and analyzing their infrastructure components will be able to search for these components in our integration repository and add them to their system. +Such integration can include infrastructure components such as AWS's EKS,ELB, ECS and many more... + +Once integrated, bundled dashboards and queries can deliver a higher and dedicated observability and accessibility into the system for better understanding and monitoring. + +Integration is **tightly coupled with a schema** that represents the data this Integration is representing, in the Observability use case the schema relates to Traces, Logs, Metrics and Alerts. +Integration for security related dashboards and data would concern with types and relationships that address that domain. + +## Background +Observability is the ability to measure a system’s current state based on the data it generates, such as logs, metrics, and traces. Observability relies on telemetry derived from instrumentation that comes from the endpoints and services. + +Observability telemetry signals (logs, metrics, traces, alerts) arriving from the system would contain all the necessary information needed to observe and monitor. + +Modern application can have a complicated distributed architecture that combines cloud native and microservices layers. Each layer produces telemetry signals that may have different structure and information. + +Using Observability telemetry schema we can organize, correlate and investigate system behavior in a standard and well-defined manner. + +Observability telemetry schema defines the following components - **logs**, **traces**, **metrics** and **alerts**. + +**Logs** provide comprehensive system details, such as a fault and the specific time when the fault occurred. By analyzing the logs, one can troubleshoot code and identify where and why the error occurred. + +**Traces** represent the entire journey of a request or action as it moves through all the layers of a distributed system. Traces allow you to profile and observe systems, especially containerized applications, serverless architectures, or microservices architecture. + +**Metrics** provide a numerical representation of data that can be used to determine a service or component’s overall behaviour over time. + +In many occasions, correlate between the logs, traces and metrics is mandatory to be able to monitor and understand how the system is behaving. In addition, the distributed nature of the application produces multiple formats of telemetry signals arriving from different components ( network router, web server, database) + +For such correlation to be possible the industry has formulated several protocols ([OTEL](https://github.com/open-telemetry), [ECS](https://github.com/elastic/ecs), [OpenMetrics](https://github.com/OpenObservability/OpenMetrics), [Alerts](https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/azure-monitor/alerts/alerts-common-schema.md)) for communicating these signals - the **Observability schemas**. + +## Problem definition +Today in OpenSearch, Observability and its dashboards are only partially aware (traces only) of the schematic structure of these signal types. In addition, the actual schema mapping is not present internally in the Observability plugin and has to be imported externally. + +Integrating different data producers and correlating different signals is practically unavailable most of the time due to missing schema definition and correlated field names and has to be done manually by every customer in every system. + +Integration of a new Observability data source (such as NGINX / Tomcat) includes complicated configuration of both the ingestion process and the actual index store, manually discovery of the specific format of the new datasource and the crafting of the dedicated dashboards for its proprietary fields. + +## Proposal +Our goal is creating a consolidated Observability solution. It will allows customers to ingest any type of supported telemetry data from many types of providers and be able to display and analyze the data in a common and unified way. + +Customers using Observability are expecting our solution to allow simple and out of the box integration and configuration. + +Using a unified schema that models all the Observability components and allowing customers to add integrations would simplify the daily monitoring and incidents investigations process (by using pre-canned dashboards and pre-defined correlation and alerts). + +As an example for the importance of a common schema : + +In a multi-layered application which produces multiple log and trace signals from different software and Network components - we need to address these signals using a common vocabulary. Such a vocabulary would simplify correlating information using common fields such as “`process.args`”, “`host.domain`”, “`observer.os`” + +--- + +## Integrating Component Structure + +The following section details the structure and composition of an integration component and how it may be utilized for the Observability use-cases. + +#### Structure +As mentioned above, integration is a collection of elements that formulate how to observe a specific data emitting resource - in our case a telemetry data producer. + +A typical Observability Integration consists of the following parts: + +***Metadata*** + + * Observability data producer resource + * Supplement Indices (mapping & naming) + * Collection Agent Version + * Transformation schema + * Optional test harnesses repository + * Verified version and documentation + * Category & classification (logs/traces/alerts/metrics) + +***Display components*** + + * Dashboards + * Maps + * Applications + * Notebooks + * Operations Panels + * Saved PPL/SQL/DQL Queries + * Alerts + +A major factor in this project is that structured data has an enormous contribution to the understanding of the system behaviour. +Once input content has form and shape - it will be used to calculate and correlate different pieces of data. + +The next parts of this document will present **Integrations For Observability** which has a key concept of Observability schema. + +It will overview the concepts of observability, will describe the current issues customers are facing with observability and continue to elaborate on how to mitigate them using Integrations and structured schemas. + +## Integration usage workflows + +The following workflows describes the end-to-end flows from the ingestion step to the discovery and analysis phase including the building and preparation of an Integration and publishing it with the community . + +1) **Creating An Integration.** +2) **Testing/Validating An Integration.** +3) **Publishing An Integration.** +4) **Loading An Integration Into Observability.** + +These flows will be described in the following documentation in this folder. + +* * * + +## References: + +### Observability Physical mapping + +As part of the Observability Integration, Observability will publish a schema that is conformed by & data-prepare & fluent-d plugins / libraries . + +Additional information attached: + +[Nginx module for Fluent-bit ECS](https://gist.github.com/agup006/7848e339f111cdaafdd0f3fdf7ee2d32) + +* **Traces** + * https://github.com/opensearch-project/data-prepper/tree/main/data-prepper-plugins/otel-trace-source + * https://github.com/open-telemetry/opentelemetry-proto/blob/v0.9.0/opentelemetry/proto/trace/v1/trace.proto + * https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/trace/semantic_conventions + * https://github.com/open-telemetry/opentelemetry-java/tree/0a9794ad415c87c162f518a9112a9b7849564bee/sdk/trace + * https://github.com/opensearch-project/observability/pull/1395 +* **Metrics** + * https://github.com/opensearch-project/data-prepper/tree/main/data-prepper-plugins/otel-metrics-source + * https://github.com/open-telemetry/opentelemetry-proto/blob/v0.9.0/opentelemetry/proto/metrics/v1/metrics.proto + * https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/metrics/semantic_conventions + * https://github.com/open-telemetry/opentelemetry-java/tree/0a9794ad415c87c162f518a9112a9b7849564bee/sdk/metrics/src/main/java/io/opentelemetry/sdk/metrics + * https://github.com/opensearch-project/observability/pull/1397 +* **Logs** + * based on OTEL / ECS logs formats [OTEL](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md) / [ECS](https://github.com/elastic/ecs/blob/main/generated/ecs/ecs_nested.yml) + * https://github.com/open-telemetry/opentelemetry-proto/blob/v0.9.0/opentelemetry/proto/logs/v1/logs.proto + * https://github.com/open-telemetry/opentelemetry-specification/tree/main/specification/logs/semantic_conventions + * https://github.com/open-telemetry/opentelemetry-java/tree/0a9794ad415c87c162f518a9112a9b7849564bee/sdk/logs + * https://github.com/opensearch-project/observability/pull/1403 + + +* * * + diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..1879e47fe --- /dev/null +++ b/docs/README.md @@ -0,0 +1,75 @@ +## Integrations Documentation Table of Content + +The integration repository contains the list of the supported catalogs ([Observability](../schema/observability/README.md) for example) + +In addition, it also contains the list of [Integrations](../integrations/README.md) such as [Nginx](../integrations/nginx/info/README.md) + +The next documents describe the user workflows, design and architecture of the project. + +--- + +- [Integration introduction](Integrations.md) + +Integration is a new type of logical component that allows high level composition of multiple Dashboards / Applications / Queries and more. +Integrations can be used to bring together all the metrics and logs from the infrastructure and gain insight into the unified system as a whole. + +--- + +- [Simple-Schema](Simple-schema.md) + +Simple Schema bring the importance of a coherent monitoring solution for modern infrastructure, and the need for a normalized schema to manage the vast amounts of log formats and types. +It presents a standard structure for Observability (based on [OTEL](https://github.com/open-telemetry) & [ECS](https://github.com/elastic/ecs)) consisting of three main signal types (logs, traces, and metrics) and supplement indices. + + +--- + +- [Integration Structure](Integration-structure.md) + + +Integrations are collections of assets with specific structure and conventions. The metadata and display components of integrations are described, along with additional assets such as datasource configuration and materialized view table creation. +The example of an NginX integration component is also presented, which includes a config file, display folder, queries folder, schema folder, samples folder, and info folder. Integration configuration includs versioning, identification, catalog, components, and collection. + +--- + +- [Integration Fields Mapping](Integration-fields-mapping.md) + +Fields mapping is a solution for connecting similar purpose fields with different names using the Field Alias feature. This allows queries and dashboards to work seamlessly without any changes to the data or index information. The proposed workflow involves users selecting an existing index for the data stream and mapping the original field to the schema destination field for the aliasing flow to be performed. + +--- + +- [Integration Plugin](Integration-plugin-tasks.md) + +The Integration Loading Flow is responsible for registering catalogs, loading integrations, and maintaining integration state. During catalog registration, the Integration Plugin creates a mapping template for each catalog schema entity. Integrations can be loaded from an in-memory cache or external zip bundle uploaded by the user. The loading process involves validation, asset upload, saved objects insertion, and mapping templates verification. The API provides additional features such as status updates, activation, and deactivation. + +--- + +- [Integration-Verification](Integration-verification.md) + +The Integration Verification process involves validating the integration's structure, schema, display, query, and assets. A dedicated validation and testing library supplied by SimpleSchema plugin is used for these validations. +An Integration Development Test-Harness is suggested to simplify and automate the validation process, which includes setting up a docker compose environment, running baseline queries, and comparing the results with expected results to verify correctness. + +--- + +- [Integration-Publishing](Integration-publishing.md) + + +The Integration Publishing process involves signing and uploading the integration to a public repository, along with metadata such as owner, license, and documentation, as well as relevant versions and sample data. The uploaded integration goes through a review process that includes running the docker sample and verifying display components work correctly. +Integrations that do not pass this process will not be bundled in the Observability release or recommended by OpenSearch, but can still be manually installed with the installing party responsible for ensuring proper operation. In the future, OpenSearch may automate this process with a dedicated API or baseline queries. + +--- + +- [Integration-Loading](Integration-loading.md) + + +This describes the process of loading integrations into OpenSearch for use in Observability. After an integration passes the OpenSearch Integration Verification Review Process, it is packaged and bundled with the Observability solution. +The integration goes through several stages in its lifecycle: `Loading`, `Maintenance`, `Ready to Ingest`, and `Ready to Read`. +During the Loading phase, the integration's assets are being loaded and configured. `Maintenance` indicates that some components may be missing or broken, and the appropriate info will be detailed on the missing parts. +Once issues are corrected, it will transform to the `Ready to Ingest` stage, indicating the integration is loaded and verified. + +--- + +- [Integration-API](Integration-API.md) + +This document outlines the API and workflow for loading integrations into OpenSearch. Integrations are a stateful bundle that are stored in the system `.integration` index and reflect the integration's status during different phases of its lifecycle. + - The first API discussed is the Load Integrations Repository API. Once the Integration plugin is started it loads all available integrations that are bundled in the integration repo. The backend scans the Integration folder and loads each integration config file into cache, allowing for filtering on the registry API. + - The Load Integration API is specifying how the integration to load into the system, the backend initiates the loading process and displays the appropriate status to reflect the loading steps. \ No newline at end of file diff --git a/docs/Simple-schema.md b/docs/Simple-schema.md new file mode 100644 index 000000000..fec47463e --- /dev/null +++ b/docs/Simple-schema.md @@ -0,0 +1,146 @@ +# Simple Schema Support +In light of increased complexity of modern infrastructure and the need for better monitoring everywhere in the stack, the requirement for collective and a coherent monitoring solution is profound. We have a normalized schema from most popular schema systems which is also accompanied by a SDK and a multi-language codegen tool powered by graphQL + +* Understanding a complex systems +* Advanced planning of infrastructure & application capacities. +* Fast problem resolving solutions +* Clear and insightful incident reviews +* Reliability in both uptime and performance + +## Key components for such a system + +### Transparency + +An observability platform requires a comprehensive and high-level view of application performance. it need to be able to drill down into specific minute details with a full context and also providing a consistent and transparent path for moving between high-level and lower-level views. + +### Multi disciplinary + +Modern software architecture is comprised of dozens of moving parts from browsers and mobile devices going through cloud components and lambda functions until the database and data lake. All these need to be viewed, understood and analyzed as a complete system. + +### Domain Specific + +Customers are interested in determining their own specific KPI’s as quickly and accurately as possible. +Customer will focuses on measuring application performance and on surfacing application-performance blockings and escalations. + +### Schema To the Rescue + +When considering the Sheer amount of log format and types - starting from the linux systemd logs to the cloudwatch, from the application proprietary logging to the RDBMS logs - everything is relevant and everything is important. +Missing something may disrupt the understanding of the system and deny the ability to analyze its weaknesses. + +Many log reporters and log formats where created during the many years the infrastructure we are using today exists - their structure is different and their semantic nature is not aligned with the contemporary jargon. + +The former statement is true many times even in relatively modern software components; for example event for network logs arriving from a network firewall or a domain controller may have different names and semantics for the same underlying concepts. + +### Normalization +In a similar manner of which the relational database is normalizing data structure we also have to normalize the logs data so that similar semantic concepts will appear the same no matter their origin. + +### Mapping & Patterns + +Using matching patterns and semantic concepts evolved in the open source community +- open telemetry +- elastic common schema +- cloud events +- open metrics + +We will demonstrate the power of the normalization of the logs and events arriving from any source to create a common understanding of the world. We will explore the way we can utilize these schemas to investigate and correlate observations into knowledge and understanding. + +## Schema support for Observability + +Simple schema for Observability is defined by the three main structured types OpenTelemetry & ECS define and supports which are **Logs, Traces, Metric**. +OpenSearch's Observability Plugin will support these schema structures out of the box in the form of a default index pattern per type (will be detailed below). + +**Supplement schema** +Any additional index that can be added by customer or a 3rd party integration component will be categorized as supplement index. Supplement indices often present enriched Observability information that has a schema. +These supplement indices may be used by “**Schema-Aware**” visualization component or queries. + +## Schema Aware Components +The role of the Observability plugin is intended to allow maximum flexibility and not imposing a strict Index structure of the data source. Nevertheless, the modern nature of distributed application and the vast amount of telemetry producers is changing this perception. + +Today most of the Observability solutions (splunk, datadog, dynatrace) recommend using a consolidated schema to represent the entire variance of log/trace/metrics producers. + +This allows monitoring, incidents investigation and corrections process to become simpler, maintainable and reproducible. + + +A **Schema-Aware visualization** component is a component which assumes the existence of specific index/indices and expects these indices to have a specific structure - schema. + +As an example we can see that Trace-Analytics is schema-aware since it directly assumes the traces & serviceMap indices exist and expects them to follow a specific schema. + +This definition doesn’t change the existing status of visualization components which are **not** “Schema Aware” but it only regulates which Visual components would benefit using a schema and which will be agnostic of its content. + +**Operation Panel** for example, are not “schema aware” since they don’t assume in advanced the existence of a specific index nor do they expect the index they display to have a specific structure. + +**Schema aware visualizations** such as Applications, Metrics, Alerts and Integrations will not be able to work directly with a non-standard proprietary index unless being explicitly mapped during the query execution - this **schema-on-read** feature will be discussed later + +## Data Model + +Observability data indices themselves have a data model which they support and comply with (Traces, Logs, Metrics & Alerts), this data model is versioned to allow future evolution. + +OpenSearch is aware of the existing leading Observability formats (OTEL / ECS) and should help customers use either one of the formats in the Observability Plugin. + +Observability needs to allow ingestion of both formats and internally consolidate them to best of its capabilities for presenting a unified Observability platform. + +The data model is highly coupled with the visual components, for example - the Application visual component & Trace analytics are directly coupled with all the Observability schemas (Logs, Traces, Spans) and possibly with some Supplement schema (ServiceMap by data-prepper ingestion pipline) + +## Ingestion Pipeline + +A mandatory part of an Observability solution is its ability to ingest data at scale, currently, OpenSearch Observability support the following out of the box schematized data providers: +- Data prepper - https://github.com/opensearch-project/data-prepper +- Jaeger - https://opensearch.org/docs/latest/observing-your-data/trace/trace-analytics-jaeger/ + +### **Data Prepper:** + +**Indices:** + +*- **Traces data**: otel-v1-apm-span-** +**(Data prepper Observability Trace mapping)*** + +*- **Logs data**: N/A* + +*- **Metrics data**: N/A* + +*- **Alerts**: N/A* + +*- **Supplement**: otel-v1-apm-service-map** ***(Proprietary Index Mapping)*** + +**Dashboards:** +- *Application* Analytics - +- *Trace analytics* + +--- + +### Jaeger : + +**Indices:** + +- **Traces data:** jaeger-span* + ***(jaeger Observability Trace mapping)*** +- ** Logs data:** N/A +- **Metrics data:** N/A + **- Alerts:** N/A +- **Supplement**: N/A + + +**Dashboards:** +- *Application* *analytics - **(without services)*** +- *Trace analytics -* ***(without services)*** +* * * + +## Observability Indices + +As states above, the Observability Default indices for collecting the main 4 telemetry types are +- logs +- traces +- metrics +- alerts + +## Schema driven Dashboards + +OpenSearch goal has always been to simplify and allow collaborative capabilities for the Observability plugin. + +The new Integration component is responsible for allowing a seamless integration of a new Observability data provider dashboards. This includes the well-structured indices, easy configuration and a common convention for ingesting multiple datasources. + +Integration is an encapsulated artifact that contains the following parts (as described above) +- resource metadata +- associated visual components + +The next workflow explains how the process of activating a new Integration is happening: diff --git a/docs/img/Integration-assets-status.png b/docs/img/Integration-assets-status.png new file mode 100644 index 000000000..a282fbf6f Binary files /dev/null and b/docs/img/Integration-assets-status.png differ diff --git a/docs/img/data-prepper.png b/docs/img/data-prepper.png new file mode 100644 index 000000000..371392715 Binary files /dev/null and b/docs/img/data-prepper.png differ diff --git a/docs/img/integration-architecture.png b/docs/img/integration-architecture.png new file mode 100644 index 000000000..aa929069e Binary files /dev/null and b/docs/img/integration-architecture.png differ diff --git a/docs/img/integration-component-layout.png b/docs/img/integration-component-layout.png new file mode 100644 index 000000000..0606acc84 Binary files /dev/null and b/docs/img/integration-component-layout.png differ diff --git a/docs/img/integration-loading-lifecycle.png b/docs/img/integration-loading-lifecycle.png new file mode 100644 index 000000000..c8ef1523d Binary files /dev/null and b/docs/img/integration-loading-lifecycle.png differ diff --git a/docs/img/maintaining-issue.png b/docs/img/maintaining-issue.png new file mode 100644 index 000000000..8967071e9 Binary files /dev/null and b/docs/img/maintaining-issue.png differ diff --git a/docs/observability/Naming-convention.md b/docs/observability/Naming-convention.md new file mode 100644 index 000000000..402d13974 --- /dev/null +++ b/docs/observability/Naming-convention.md @@ -0,0 +1,121 @@ +# Naming Convention +This document will describe the index naming standard for ingestion of Observability signals - Traces, Metrics, Logs. +Currently, there is no single coherent pattern to use for all Observability signals and potential data sources. + +For example - `data-prepper` use their own index naming and structure to ingest Observability signals. + +`data-prepper Indices:` + +- Traces data: `otel-v1-apm-span-**` *(Observability Trace mapping)* +- Supplement: `otel-v1-apm-service-map` *(Proprietary Index Mapping)* + +The same goes for jaeger trace data type: +- Traces data: `jaeger-span*` *(Observability Trace mapping)* + +This convention is also harder to manage regarding the index revolving for lifecycle management - this would be optimized using the `data_stream` layer supported by OpenSearch API. + +Today due to different index structure and non-standard naming patterns we cant create crosscutting queries that will correlate or aggregate information on top of different Observability data providers. + +## Proposal + +We would use the next structure and naming patterns based on the following conventions : +1) Add `data_stream` support for all Observability based standard indices +2) Use a standard Observability signals naming index conventions +3) Create customer namespace naming degree of freedom to allow arbitrary names for specific customer use-cases +4) Move the Observability Indices Template & default index creation into Observability Plugin bootstrap + +--- +1) Using the `data_stream` will encourage simple physical index management and query - each Observability index would actually be a data_stream: + +``` +A typical workflow to manage time-series data involves multiple steps, such as creating a rollover index alias, defining a write index, and defining common mappings and settings for the backing indices. + +Data streams simplify this process and enforce a setup that best suits time-series data, such as being designed primarily for append-only data and ensuring that each document has a timestamp field. + +A data stream is internally composed of multiple backing indices. Search requests are routed to all the backing indices, while indexing requests are routed to the latest write index +``` + +2) Consolidating data using the `data_stream` concepts patterns and catalog. The next Observability index pattern will be followed: + +Index pattern will follow the next naming structure `{type}`-`{dataset}`-`{namespace}` + +- **type** - indicated the observability high level types "logs", "metrics", "traces" (prefixed by the `sso_` schema convention ) +- **dataset** - The field can contain anything that classify the source of the data - such as `nginx.access` (If none specified "**default** " will be used). +- **namespace** - A user defined namespace. Mainly useful to allow grouping of data such as production grade, geography classification + +3) The ***sso_{type}-{dataset}-{namespace}*** Pattern address the capability of differentiation of similar information structure to different indices accordingly to customer strategy. + +This strategy will be defined by the two degrees of naming freedom: `dataset` and `namespace` + +For example a customer may want to route the nginx logs from two geographical areas into two different indices: +- `sso_logs-nginx-us` +- `sso_logs-nginx-eu` + +This type of distinction also allows for creation of crosscutting queries by setting the next **index query pattern** `sso_logs-nginx-*` or by using a geographic based crosscutting query `sso_logs-*-eu`. + + +## Data index routing +The [ingestion component](https://github.com/opensearch-project/data-prepper) which is responsible for ingesting the Observability signals should route the data into the relevant indices. +The `sso_{type}-{dataset}-{namespace}` combination dictates the target index, `{type}` is prefixed with the `sso_` prefix into one of the supported type: + +- Traces - `sso_traces` +- Metrics - `sso_metrics` +- Logs - `sso_logs` + +For example if within the ingested log contains the following section: +```json +{ + ... + "attributes": { + "data_stream": { + "type": "span", + "dataset": "mysql", + "namespace": "prod" + } + } +} +``` +This indicates that the target index for this observability signal should be `sso_traces`-`mysql`-`prod` index that follows uses the traces schema mapping. + +If the `data_stream` information if not present inside the signal, the default index should be used. + + +--- + +## Observability Index templates +With the expectation of multiple Observability data providers and the need to consolidate all to a single common schema - the Observability plugin will take the following responsibilities : + +- Define and create all the signals index templates upon loading +- Create default data_stream for each signal type upon explicit request + - **_this is not done eagerly since the customer may want to change some template index settings_** before generating the default indices +- Publish a versioned schema file (Json Schema) for each signal type for general validation usage by any 3rd party + +### Note +It is important to mention here that these new capabilities would not change or prevent existing customer usage of the system and continue to allow proprietary usage. + + +### In details +*Logs Schema* +Default Generated index pattern name: *logs-default-namespace* +see - https://github.com/opensearch-project/observability/pull/1403 + +*Traces Schema* +Default Generated index pattern name: *traces-default-namespace* +see - https://github.com/opensearch-project/observability/pull/1395 + +*Metrics Schema* +Default Generated index pattern name: *metrics-default-namespace* +see - https://github.com/opensearch-project/observability/pull/1397 + +--- + +**What alternatives have you considered?** +A clear and concise description of any alternative solutions or features you've considered. + +## Note +Important to mention here that this new suggestion would not change or prevent existing customer usage of the system and continue to allow proprietary usage. + +**Do you have any additional context?** +see https://github.com/opensearch-project/OpenSearch-Dashboards/issues/3412 +see https://opensearch.org/docs/latest/opensearch/data-streams/ +see https://github.com/opensearch-project/data-prepper \ No newline at end of file diff --git a/docs/schema/security/README.md b/docs/schema/security/README.md deleted file mode 100644 index 1695a2edc..000000000 --- a/docs/schema/security/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Security Domain Schema - -OpenSearch Security is a [plugin](https://github.com/opensearch-project/security) for OpenSearch that offers encryption, authentication and authorization. When combined with OpenSearch Security-Advanced Modules, it supports authentication via Active Directory, LDAP, Kerberos, JSON web tokens, SAML, OpenID and more. It includes fine grained role-based access control to indices, documents and fields. It also provides multi-tenancy support in OpenSearch Dashboards.