From 798f2d36ffa5c351f54d34349093bda4d33c9b22 Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Thu, 11 Jul 2024 14:19:18 +0100 Subject: [PATCH] Add support for some content scanner endpoints. (#45) * Add support for some content scanner endpoints. * fix tests * fix empty media id being allowed --- src/MatrixClient.ts | 43 +++++++++++++------- src/MatrixContentScannerClient.ts | 66 +++++++++++++++++++++++++++++++ src/e2ee/CryptoClient.ts | 4 +- src/http.ts | 17 +++++++- src/index.ts | 2 + src/models/MXCUrl.ts | 22 +++++++++++ 6 files changed, 136 insertions(+), 18 deletions(-) create mode 100644 src/MatrixContentScannerClient.ts create mode 100644 src/models/MXCUrl.ts diff --git a/src/MatrixClient.ts b/src/MatrixClient.ts index ddba1827..84db1ff6 100644 --- a/src/MatrixClient.ts +++ b/src/MatrixClient.ts @@ -21,7 +21,7 @@ import { PowerLevelBounds } from "./models/PowerLevelBounds"; import { EventKind } from "./models/events/EventKind"; import { IdentityClient } from "./identity/IdentityClient"; import { OpenIDConnectToken } from "./models/OpenIDConnect"; -import { doHttpRequest } from "./http"; +import { doHttpRequest, DoHttpRequestOpts } from "./http"; import { Space, SpaceCreateOptions } from "./models/Spaces"; import { PowerLevelAction } from "./models/PowerLevelAction"; import { CryptoClient } from "./e2ee/CryptoClient"; @@ -46,6 +46,8 @@ import { RoomCreateOptions } from "./models/CreateRoom"; import { PresenceState } from './models/events/PresenceEvent'; import { IKeyBackupInfo, IKeyBackupInfoRetrieved, IKeyBackupInfoUnsigned, IKeyBackupInfoUpdate, IKeyBackupVersion, KeyBackupVersion } from "./models/KeyBackup"; import { MatrixError } from "./models/MatrixError"; +import { MXCUrl } from "./models/MXCUrl"; +import { MatrixContentScannerClient } from "./MatrixContentScannerClient"; const SYNC_BACKOFF_MIN_MS = 5000; const SYNC_BACKOFF_MAX_MS = 15000; @@ -79,6 +81,13 @@ export class MatrixClient extends EventEmitter { */ public readonly crypto: CryptoClient; + /** + * The Content Scanner API instance for this client. This is set if `opts.enableContentScanner` + * is true. The `downloadContent` and `crypto.decryptMedia` methods automatically go via + * the content scanner when this is set. + */ + public readonly contentScannerInstance?: MatrixContentScannerClient; + /** * The DM manager instance for this client. */ @@ -94,7 +103,7 @@ export class MatrixClient extends EventEmitter { private filterId = 0; private stopSyncing = false; private metricsInstance: Metrics = new Metrics(); - private unstableApisInstance = new UnstableApis(this); + private readonly unstableApisInstance = new UnstableApis(this); private cachedVersions: ServerVersions; private versionsLastFetched = 0; @@ -118,6 +127,7 @@ export class MatrixClient extends EventEmitter { public readonly accessToken: string, private storage: IStorageProvider = null, public readonly cryptoStore: ICryptoStorageProvider = null, + opts: { enableContentScanner?: boolean } = {}, ) { super(); @@ -149,6 +159,10 @@ export class MatrixClient extends EventEmitter { if (!this.storage) this.storage = new MemoryStorageProvider(); this.dms = new DMs(this); + + if (opts.enableContentScanner) { + this.contentScannerInstance = new MatrixContentScannerClient(this); + } } /** @@ -1587,11 +1601,8 @@ export class MatrixClient extends EventEmitter { * @returns {string} The HTTP URL for the content. */ public mxcToHttp(mxc: string): string { - if (!mxc.startsWith("mxc://")) throw new Error("Not a MXC URI"); - const parts = mxc.substring("mxc://".length).split('/'); - const originHomeserver = parts[0]; - const mediaId = parts.slice(1, parts.length).join('/'); - return `${this.homeserverUrl}/_matrix/media/v3/download/${encodeURIComponent(originHomeserver)}/${encodeURIComponent(mediaId)}`; + const { domain, mediaId } = MXCUrl.parse(mxc); + return `${this.homeserverUrl}/_matrix/media/v3/download/${encodeURIComponent(domain)}/${encodeURIComponent(mediaId)}`; } /** @@ -1633,13 +1644,11 @@ export class MatrixClient extends EventEmitter { * @returns {Promise<{data: Buffer, contentType: string}>} Resolves to the downloaded content. */ public async downloadContent(mxcUrl: string, allowRemote = true): Promise<{ data: Buffer, contentType: string }> { - if (!mxcUrl.toLowerCase().startsWith("mxc://")) { - throw Error("'mxcUrl' does not begin with mxc://"); + if (this.contentScannerInstance) { + return this.contentScannerInstance.downloadContent(mxcUrl, allowRemote); } - const urlParts = mxcUrl.substr("mxc://".length).split("/"); - const domain = encodeURIComponent(urlParts[0]); - const mediaId = encodeURIComponent(urlParts[1].split("/")[0]); - const path = `/_matrix/media/v3/download/${domain}/${mediaId}`; + const { domain, mediaId } = MXCUrl.parse(mxcUrl); + const path = `/_matrix/media/v3/download/${encodeURIComponent(domain)}/${encodeURIComponent(mediaId)}`; const res = await this.doRequest("GET", path, { allow_remote: allowRemote }, null, null, true, null, true); return { data: res.body, @@ -2095,7 +2104,8 @@ export class MatrixClient extends EventEmitter { * @returns {Promise} Resolves to the response (body), rejected if a non-2xx status code was returned. */ @timedMatrixClientFunctionCall() - public doRequest(method, endpoint, qs = null, body = null, timeout = 60000, raw = false, contentType = "application/json", noEncoding = false): Promise { + public doRequest(method, endpoint, qs = null, body = null, timeout = 60000, raw = false, + contentType = "application/json", noEncoding = false, opts?: DoHttpRequestOpts): Promise { if (this.impersonatedUserId) { if (!qs) qs = { "user_id": this.impersonatedUserId }; else qs["user_id"] = this.impersonatedUserId; @@ -2108,7 +2118,10 @@ export class MatrixClient extends EventEmitter { if (this.accessToken) { headers["Authorization"] = `Bearer ${this.accessToken}`; } - return doHttpRequest(this.homeserverUrl, method, endpoint, qs, body, headers, timeout, raw, contentType, noEncoding); + return doHttpRequest( + this.homeserverUrl, method, endpoint, qs, body, headers, + timeout, raw, contentType, noEncoding, opts, + ); } } diff --git a/src/MatrixContentScannerClient.ts b/src/MatrixContentScannerClient.ts new file mode 100644 index 00000000..d1288c3c --- /dev/null +++ b/src/MatrixContentScannerClient.ts @@ -0,0 +1,66 @@ +import { EncryptedFile, MatrixClient } from "."; +import { MXCUrl } from "./models/MXCUrl"; + +export interface ContentScannerResult { + info: string; + clean: boolean; +} +export interface ContentScannerErrorResult { + info: string; + reason: string; +} + +export class MatrixContentScannerError extends Error { + constructor(public readonly body: ContentScannerErrorResult) { + super(`Encountered error scanning content (${body.reason}): ${body.info}`); + } +} + +const errorHandler = (_response, errBody) => { + return typeof (errBody) === "object" && 'reason' in errBody ? + new MatrixContentScannerError(errBody as ContentScannerErrorResult) : undefined; +}; + +/** + * API client for https://github.com/element-hq/matrix-content-scanner-python. + */ +export class MatrixContentScannerClient { + constructor(public readonly client: MatrixClient) { + + } + + public async scanContent(mxcUrl: string): Promise { + const { domain, mediaId } = MXCUrl.parse(mxcUrl); + const path = `/_matrix/media_proxy/unstable/scan/${domain}/${mediaId}`; + const res = await this.client.doRequest("GET", path, null, null, null, false, null, false, { errorHandler }); + return res; + } + + public async scanContentEncrypted(file: EncryptedFile): Promise { + // Sanity check. + MXCUrl.parse(file.url); + const path = `/_matrix/media_proxy/unstable/scan_encrypted`; + const res = await this.client.doRequest("POST", path, null, { file }, null, false, null, false, { errorHandler }); + return res; + } + + public async downloadContent(mxcUrl: string, allowRemote = true): ReturnType { + const { domain, mediaId } = MXCUrl.parse(mxcUrl); + const path = `/_matrix/media_proxy/unstable/download/${encodeURIComponent(domain)}/${encodeURIComponent(mediaId)}`; + const res = await this.client.doRequest("GET", path, null, null, null, true, null, true, { errorHandler }); + return { + data: res.body, + contentType: res.headers["content-type"], + }; + } + + public async downloadEncryptedContent(file: EncryptedFile): Promise { + // Sanity check. + MXCUrl.parse(file.url); + const path = `/_matrix/media_proxy/unstable/download_encrypted`; + const res = await this.client.doRequest("POST", path, undefined, { + file, + }, null, true, null, true, { errorHandler }); + return res.data; + } +} diff --git a/src/e2ee/CryptoClient.ts b/src/e2ee/CryptoClient.ts index be8b789d..3b448215 100644 --- a/src/e2ee/CryptoClient.ts +++ b/src/e2ee/CryptoClient.ts @@ -290,7 +290,9 @@ export class CryptoClient { */ @requiresReady() public async decryptMedia(file: EncryptedFile): Promise { - const contents = (await this.client.downloadContent(file.url)).data; + const contents = this.client.contentScannerInstance ? + await this.client.contentScannerInstance.downloadEncryptedContent(file) : + (await this.client.downloadContent(file.url)).data; const encrypted = new EncryptedAttachment( contents, JSON.stringify(file), diff --git a/src/http.ts b/src/http.ts index 20a3b82b..9f023406 100644 --- a/src/http.ts +++ b/src/http.ts @@ -4,6 +4,15 @@ import { MatrixError } from "./models/MatrixError"; let lastRequestId = 0; +const defaultErrorHandler = (response, errBody) => { + return typeof (errBody) === "object" && 'errcode' in errBody ? + new MatrixError(errBody, response.statusCode, response.headers) : undefined; +}; + +export interface DoHttpRequestOpts { + errorHandler?: (response, body) => Error|undefined; +} + /** * Performs a web request to a server. * @category Unit testing @@ -30,6 +39,9 @@ export async function doHttpRequest( raw = false, contentType = "application/json", noEncoding = false, + opts: DoHttpRequestOpts = { + errorHandler: defaultErrorHandler, + }, ): Promise { if (!endpoint.startsWith('/')) { endpoint = '/' + endpoint; @@ -104,10 +116,11 @@ export async function doHttpRequest( // Check for errors. const errBody = response.body || resBody; - if (typeof (errBody) === "object" && 'errcode' in errBody) { + const handledError = opts.errorHandler(response, errBody); + if (handledError) { const redactedBody = respIsBuffer ? '' : redactObjectForLogging(errBody); LogService.error("MatrixHttpClient", "(REQ-" + requestId + ")", redactedBody); - throw new MatrixError(errBody, response.statusCode, response.headers); + throw handledError; } // Don't log the body unless we're in debug mode. They can be large. diff --git a/src/index.ts b/src/index.ts index 56afd3d0..3d5ab340 100644 --- a/src/index.ts +++ b/src/index.ts @@ -55,6 +55,7 @@ export * from "./models/PowerLevelAction"; export * from "./models/ServerVersions"; export * from "./models/MatrixError"; export * from "./models/CreateRoom"; +export * from "./models/MXCUrl"; // Unstable models export * from "./models/unstable/MediaInfo"; @@ -112,6 +113,7 @@ export * from "./request"; export * from "./PantalaimonClient"; export * from "./SynchronousMatrixClient"; export * from "./SynapseAdminApis"; +export * from "./MatrixContentScannerClient"; export * from "./simple-validation"; export * from "./b64"; export * from "./http"; diff --git a/src/models/MXCUrl.ts b/src/models/MXCUrl.ts new file mode 100644 index 00000000..b774ea8c --- /dev/null +++ b/src/models/MXCUrl.ts @@ -0,0 +1,22 @@ +export class MXCUrl { + static parse(mxcUrl: string): MXCUrl { + if (!mxcUrl?.toLowerCase()?.startsWith("mxc://")) { + throw Error("Not a MXC URI"); + } + const [domain, ...mediaIdParts] = mxcUrl.slice("mxc://".length).split("/"); + if (!domain) { + throw Error("missing domain component"); + } + const mediaId = mediaIdParts?.join('/') ?? undefined; + if (!mediaId) { + throw Error("missing mediaId component"); + } + return new MXCUrl(domain, mediaId); + } + + constructor(public domain: string, public mediaId: string) { } + + public toString() { + return `mxc://${this.domain}/${this.mediaId}`; + } +}