Skip to content

Commit

Permalink
HEIC/HEIF image converter lambda (#106)
Browse files Browse the repository at this point in the history
* Add image converter lambda js file

* Update to add delete of original file, some checks for file responses, and comments

* add image converter lambda name to main.tf script for the wfdm file index initializer, add a call to the file index initializer to call the image conversion lambda for heic files

* Remove commented out code
  • Loading branch information
CEBergin-Vivid authored Apr 11, 2024
1 parent dac5ecb commit ad64a64
Show file tree
Hide file tree
Showing 4 changed files with 286 additions and 21 deletions.
1 change: 1 addition & 0 deletions terraform/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,7 @@ resource "aws_lambda_function" "terraform_indexing_initializer_function" {
WFDM_DOCUMENT_CLAMAV_S3BUCKET = data.aws_s3_bucket.clamav-bucket.bucket
WFDM_DOCUMENT_TOKEN_URL = "${var.document_token_url}"
WFDM_INDEXING_LAMBDA_NAME = aws_lambda_function.terraform_wfdm_indexing_function.function_name
WFDM_IMAGE_CONVERTER_LAMBDA_NAME = "WF1-WFDM-image-converter-${var.env}"
WFDM_DOCUMENT_SECRET_MANAGER = "${var.secret_manager_name}"
WFDM_DOCUMENT_FILE_SIZE_SCAN_LIMIT = "${var.file_scan_size_limit}"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,37 +117,56 @@ public SQSBatchResponse handleRequest(SQSEvent sqsEvent, Context context) {
if (fileDetailsJson.has("mimeType") ){
mimeType = fileDetailsJson.get("mimeType").toString();
} else {
mimeType = "";
mimeType = "";
}

String fileExtension;
if (fileDetailsJson.has("fileExtension")) {
fileExtension = fileDetailsJson.get("fileExtension").toString().toUpperCase();
} else {
fileExtension = "";
}

// if a file has a heic or heif mimetype it needs to be converted by the image
// conversion lambda rather than processed
if (fileExtension.equals("HEIC") || fileExtension.equals("HEIF") ) {
logger.log("\nInfo: File with mimeType of " + mimeType + " calling image conversion lambda");
AWSLambda client = AWSLambdaAsyncClient.builder().withRegion(region).build();
InvokeRequest request = new InvokeRequest();
request.withFunctionName(System.getenv("WFDM_IMAGE_CONVERTER_LAMBDA_NAME").trim()).withPayload(fileDetailsJson.toString());
InvokeResult invoke = client.invoke(request);

} else {

// Check the event type. If this is a BYTES event, write the bytes
// otherwise, handle meta only and skip clam scan.
if (eventType.equalsIgnoreCase("bytes")) {
// Check the event type. If this is a BYTES event, write the bytes
// otherwise, handle meta only and skip clam scan.
if (eventType.equalsIgnoreCase("bytes")) {
logger.log("\nInfo: File found on WFDM: " + fileInfo);
// Update Virus scan metadata
// Note, current user likely lacks access to update metadata so we'll need to update webade
boolean metaAdded = GetFileFromWFDMAPI.setVirusScanMetadata(wfdmToken, fileId, versionNumber, fileDetailsJson);
boolean metaAdded = GetFileFromWFDMAPI.setVirusScanMetadata(wfdmToken, fileId, versionNumber,
fileDetailsJson);
if (!metaAdded) {
// We failed to apply the metadata regarding the virus scan status...
// Should we continue to process the data from this point, or just choke?
logger.log("\nERROR: Failed to add metadata to file resource");
}

AmazonS3 s3client = AmazonS3ClientBuilder
.standard()
.withCredentials(credentialsProvider)
.withRegion(region)
.build();
.standard()
.withCredentials(credentialsProvider)
.withRegion(region)
.build();

Bucket clamavBucket = null;
List<Bucket> buckets = s3client.listBuckets();
for(Bucket bucket : buckets) {
for (Bucket bucket : buckets) {
if (bucket.getName().equalsIgnoreCase(bucketName)) {
clamavBucket = bucket;
}
}

if(clamavBucket == null) {
if (clamavBucket == null) {
throw new Exception("S3 Bucket " + bucketName + " does not exist.");
}

Expand All @@ -158,22 +177,26 @@ public SQSBatchResponse handleRequest(SQSEvent sqsEvent, Context context) {
meta.setContentLength(Long.parseLong(fileDetailsJson.get("fileSize").toString()));
meta.addUserMetadata("title", fileId + "-" + versionNumber);
logger.log("putting into s3 bucket");
s3client.putObject(new PutObjectRequest(clamavBucket.getName(), fileDetailsJson.get("fileId").toString() + "-" + versionNumber, stream, meta));
s3client.putObject(new PutObjectRequest(clamavBucket.getName(),
fileDetailsJson.get("fileId").toString() + "-" + versionNumber, stream, meta));
}
//handling to allow folders to be added to opensearch bypassing the clamAv scan and sending them directly to the file index service
else if (eventType.equalsIgnoreCase("meta") && (fileDetailsJson.get("mimeType").toString().equals("null"))) {
else if (eventType.equalsIgnoreCase("meta") && (fileDetailsJson.get("mimeType").toString().equals("null"))) {
AWSLambda client = AWSLambdaAsyncClient.builder().withRegion(region).build();
InvokeRequest request = new InvokeRequest();
request.withFunctionName(System.getenv("WFDM_INDEXING_LAMBDA_NAME").trim()).withPayload(fileDetailsJson.toString());
request.withFunctionName(System.getenv("WFDM_INDEXING_LAMBDA_NAME").trim())
.withPayload(fileDetailsJson.toString());
InvokeResult invoke = client.invoke(request);

} else {
// Meta only update, so fire a message to the Indexer Lambda
logger.log("Calling lambda name: "+System.getenv("WFDM_INDEXING_LAMBDA_NAME").trim()+" lambda: "+messageBody);
AWSLambda client = AWSLambdaAsyncClient.builder().withRegion(region).build();
InvokeRequest request = new InvokeRequest();
request.withFunctionName(System.getenv("WFDM_INDEXING_LAMBDA_NAME").trim()).withPayload(messageBody);
InvokeResult invoke = client.invoke(request);
} else {
// Meta only update, so fire a message to the Indexer Lambda
logger.log("Calling lambda name: " + System.getenv("WFDM_INDEXING_LAMBDA_NAME").trim() + " lambda: "
+ messageBody);
AWSLambda client = AWSLambdaAsyncClient.builder().withRegion(region).build();
InvokeRequest request = new InvokeRequest();
request.withFunctionName(System.getenv("WFDM_INDEXING_LAMBDA_NAME").trim()).withPayload(messageBody);
InvokeResult invoke = client.invoke(request);
}
}
}
} catch (UnirestException | TransformerConfigurationException | SAXException e) {
Expand Down
229 changes: 229 additions & 0 deletions wfdm-image-converter/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
const { promisify } = require('util');
const fs = require('fs');
const convert = require('heic-convert');
const Axios = require('axios');
const FormData = require('form-data');
const path = require('path');

//Lambda takes a fileId from the queue from the api that has sent a heic or heif file to be converted
// it gets that image file from the api and saves it to the lambdas tmp folder
// then the file is converted to jpg and also written to the tmp folder
// then the file is sent to the wfdm api as a new image
// once that is succesful the original image is deleted


exports.handler = async (event) => {
try {

let fileId;
let wfdmApi = process.env.wfdmApi
let apiURL = wfdmApi + "documents/";


let tokenService = process.env.tokenService
let clientName = process.env.clientName
let clientSecret = process.env.clientSecret


for (let { messageId, body } of event.Records) {
console.log('SQS message %s: %j', messageId, body);
console.log(body);
let jsonBody = JSON.stringify(body)
jsonBody = jsonBody.split(':')
jsonBody = jsonBody[1].split(',')
jsonBody = jsonBody[0].replaceAll("'", "")
console.log("json body after stringify" + jsonBody)
fileId = jsonBody;
}


const encoded = Buffer.from(clientName + ':' + clientSecret).toString('base64');

let tokenConfig = {
method: 'get',
maxBodyLength: Infinity,
url: tokenService,
headers: {
'Authorization': 'Basic ' + encoded
}
};
let bearerToken
let bearerTokenResponse = await Axios.request(tokenConfig)
.then((response) => {
return response;
})
.catch((error) => {
console.log(error);
return error;
});

if (bearerTokenResponse.status !== 200) {
console.log("no bearer token found");
} else {
bearerToken = bearerTokenResponse.data.access_token

let fileInfo = await Axios.get(apiURL + fileId, {
headers: {
'content-type': 'multipart/form-data',
'Authorization': 'Bearer ' + bearerToken
}
})
if (fileInfo.status !== 200) {
console.log("file was not retrieved with fileId: " + fileId)
} else {
//find the new file to be used
let fileName = path.basename(fileInfo.data.filePath).split('.')[0] + ".jpg";

let parentFileId = fileInfo.data.parent.fileId;

//save the image to be converted to the temp folder
await downloadImage(apiURL + fileId + "/bytes?", "/tmp/" + fileId, bearerToken);

const inputBuffer = await promisify(fs.readFile)("/tmp/" + fileId);

// convert the image to jpg
const outputBuffer = await convert({
buffer: inputBuffer,
format: 'JPEG',
quality: 1
});

// write the converted image to the tmp folder
await promisify(fs.writeFile)("/tmp/" + fileName, outputBuffer);

let stats = fs.statSync("/tmp/" + fileName);
let fileSizeInBytes = stats.size;

// create the json data that a file is created with
let jsonData = {
"@type": "http://resources.wfdm.nrs.gov.bc.ca/fileDetails",
"type": "http://resources.wfdm.nrs.gov.bc.ca/fileDetails",
"parent": {
"@type": "http://resources.wfdm.nrs.gov.bc.ca/file",
"type": "http://resources.wfdm.nrs.gov.bc.ca/file",
"fileId": parentFileId
},
"fileSize": fileSizeInBytes,
"fileType": "DOCUMENT",
"filePath": fileName,
"security": [],
"metadata": [],
"fileCheckout": null,
"lockedInd": null,
"uploadedOnTimestamp": null
}

let jsonDataString = JSON.stringify(jsonData);

let jsonDataFilePath = "/tmp/fileInfo" + fileName + ".json";

//write the json file to temp so it can be sent along with the image
await promisify(fs.writeFile)(jsonDataFilePath, jsonDataString);

// send the new image with it's json data back to the wfdm api
let postImageResponse = await postImage(apiURL, "/tmp/" + fileName, jsonDataFilePath, bearerToken);

if (postImageResponse.status !== 201) {
console.log("failed to write image")
} else {
//remove files from temp folder once they've been uploaded back to API
fs.unlinkSync("/tmp/" + fileId);
fs.unlinkSync("/tmp/" + fileName);
fs.unlinkSync(jsonDataFilePath);

// with the converted succesfully converted, the original can be deleted
let deleteResponse = await deleteOriginalImage(apiURL, bearerToken, fileId);

if (deleteResponse !== '') {
console.log("failed to delete original image with fileId: " + fileId)
} else {
console.log("deleted original image with fileId: " + fileId)
}
}
}
}
} catch (error) {
console.log(error)
fs.unlinkSync("/tmp/" + fileId);
fs.unlinkSync("/tmp/" + fileName);
fs.unlinkSync(jsonDataFilePath);
}


}


async function deleteOriginalImage(url, bearerToken, fileId) {

const axios = require('axios');

let config = {
method: 'delete',
maxBodyLength: Infinity,
url: url + fileId,
headers: {
'Authorization': 'Bearer ' + bearerToken
}
};

return Axios.request(config)
.then((response) => {
console.log(JSON.stringify(response.data));
return response.data
})
.catch((error) => {
console.log(error);
return error;
});

}

async function postImage(url, filePath, jsonDataFilePath, bearerToken) {
try {
let data = new FormData();
data.append('file', fs.createReadStream(filePath));
data.append('resource', fs.createReadStream(jsonDataFilePath));

let config = {
method: 'post',
maxBodyLength: Infinity,
url: url,
headers: {
'Content-Type': 'application/json',
'Authorization': 'Bearer ' + bearerToken,
...data.getHeaders()
},
data: data
};

return await Axios.request(config)
.then((response) => {
console.log(JSON.stringify(response.data));
return response
})
.catch((error) => {
console.log(error);
return error
});
} catch (error) {
console.error(error);
return error
}
};

async function downloadImage(url, filepath, bearerToken) {
const response = await Axios({
url,
method: 'GET',
responseType: 'stream',
headers: {
'Content-Type': 'application/octet-stream',
'Authorization': `Bearer ` + bearerToken
},
});
return new Promise((resolve, reject) => {
response.data.pipe(fs.createWriteStream(filepath))
.on('error', reject)
.once('close', () => resolve(filepath));
});
}
12 changes: 12 additions & 0 deletions wfdm-image-converter/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"version": "1.0.0",
"build-date": "2024-04-08",
"name": "image-converter",
"title": "Heic Image Converter",
"repository": "github:bcgov/nr-bcws-opensearch",
"dependencies": {
"heic-convert": "2.1.0",
"axios": "1.6.8",
"form-data": "4.0.0"
}
}

0 comments on commit ad64a64

Please sign in to comment.