Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add ARM64 support #112

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,15 @@ jobs:
run: npm run test:integration:py38
- name: Run integration test against Runtime NodeJS 16
run: npm run test:integration:node16
arm64:
name: AARCH64
runs-on: ubuntu-latest
defaults:
run:
working-directory: './'
steps:
- uses: actions/checkout@v2
- uses: pguyot/arm-runner-action@v2
with:
commands: |
docker build --platform linux/arm64 -t tesseract-lambda-layer -f Dockerfile.al2 .
33 changes: 33 additions & 0 deletions AARCH64.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
qemu-user-static [1]

## How-To

**Setup machone to be able to build-multi architecture images**

Download qemu binaries and register binfmt_misc entries:

```shell
# check host architecture (only x86_64 supported atm)
uname -m
> x86_64
# enable the execution of different multi-architecture containers by QEMU
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
```

**Build images for amd64 and aarch64**

```shell
docker buildx build --platform linux/amd64,linux/arm64 -t tesseract-lambda-layer -f Dockerfile.al2 .
```

```
export CONTAINER=$(docker run -d tesseract-lambda-layer-aarch64 false)
docker cp $CONTAINER:/opt/build-dist ready-to-use/amazonlinux-2-aarch64
docker rm $CONTAINER
unset CONTAINER
```


---

[1]: https://github.com/multiarch/qemu-user-static
8 changes: 4 additions & 4 deletions Dockerfile.al2
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## Builds binaries for Amazonlinux 2
FROM lambci/lambda-base-2:build
FROM public.ecr.aws/sam/build-provided.al2:1.62

ARG LEPTONICA_VERSION=1.82.0
ARG TESSERACT_VERSION=5.2.0
Expand All @@ -17,7 +17,7 @@ ARG TESSERACT_DATA_VERSION=4.1.0
RUN yum makecache fast; yum clean all && yum -y update && yum -y upgrade; yum clean all && \
yum install -y yum-plugin-ovl; yum clean all && yum -y groupinstall "Development Tools"; yum clean all

RUN yum -y install gcc gcc-c++ make autoconf aclocal automake libtool \
RUN yum -y install clang gcc-c++ make autoconf aclocal automake libtool \
libjpeg-devel libpng-devel libtiff-devel zlib-devel \
libzip-devel freetype-devel lcms2-devel libwebp-devel \
libicu-devel tcl-devel tk-devel pango-devel cairo-devel; yum clean all
Expand All @@ -26,7 +26,7 @@ WORKDIR ${TMP_BUILD}/leptonica-build
RUN curl -L https://github.com/DanBloomberg/leptonica/releases/download/${LEPTONICA_VERSION}/leptonica-${LEPTONICA_VERSION}.tar.gz | tar xz && cd ${TMP_BUILD}/leptonica-build/leptonica-${LEPTONICA_VERSION} && \
./configure --prefix=${LEPTONICA} && make && make install && cp -r ./src/.libs /opt/liblept

RUN echo "/opt/leptonica/lib" > /etc/ld.so.conf.d/leptonica.conf && ldconfig
RUN echo "/opt/leptonica/lib" > /etc/ld.so.conf.d/leptonica.conf && /usr/sbin/ldconfig

WORKDIR ${TMP_BUILD}/autoconf-build
RUN curl https://ftp.gnu.org/gnu/autoconf-archive/autoconf-archive-${AUTOCONF_ARCHIVE_VERSION}.tar.xz | tar xJ && \
Expand All @@ -35,7 +35,7 @@ RUN curl https://ftp.gnu.org/gnu/autoconf-archive/autoconf-archive-${AUTOCONF_AR
WORKDIR ${TMP_BUILD}/tesseract-build
RUN curl -L https://github.com/tesseract-ocr/tesseract/archive/${TESSERACT_VERSION}.tar.gz | tar xz && \
cd tesseract-${TESSERACT_VERSION} && ./autogen.sh && PKG_CONFIG_PATH=/opt/leptonica/lib/pkgconfig LIBLEPT_HEADERSDIR=/opt/leptonica/include \
./configure --prefix=${TESSERACT} --with-extra-includes=/opt/leptonica/include --with-extra-libraries=/opt/leptonica/lib 'CXXFLAGS=-mavx2' && make && make install
./configure --prefix=${TESSERACT} --with-extra-includes=/opt/leptonica/include --with-extra-libraries=/opt/leptonica/lib && make && make install

WORKDIR /opt
RUN mkdir -p ${DIST}/lib && mkdir -p ${DIST}/bin && \
Expand Down
75 changes: 75 additions & 0 deletions continous-integration/index-aarch64.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import * as lambda from 'aws-cdk-lib/aws-lambda';
import * as nodelambda from 'aws-cdk-lib/aws-lambda-nodejs';
import { Architecture, CfnLayerVersion, Code, Runtime } from 'aws-cdk-lib/aws-lambda';
import * as path from 'path';
import { App, DockerImage, Duration, Stack } from 'aws-cdk-lib';


const app = new App();
const stack = new Stack(app, 'tesseract-lambda-ci');
const pathToLayerSource = path.resolve(__dirname, '..');
/**
* Test setup and artifacts for AL 2
*/
const al2Layer = new lambda.LayerVersion(stack, 'al2-layer', {
code: Code.fromAsset(pathToLayerSource, {
bundling: {
image: DockerImage.fromBuild(pathToLayerSource, { file: 'Dockerfile.al2', platform: 'linux/arm64' }),
command: ['/bin/bash', '-c', 'cp -r /opt/build-dist/. /asset-output/'],
},
}),
description: 'AL2 Tesseract Layer',
});
stack.renameLogicalId(stack.getLogicalId(al2Layer.node.defaultChild as CfnLayerVersion), 'al2layer')

new lambda.Function(stack, 'python3.8', {
code: lambda.Code.fromAsset(path.resolve(__dirname, 'lambda-handlers/py38'),
{
bundling: {
image: DockerImage.fromRegistry('public.ecr.aws/sam/build-python3.8:latest'),
command: ['/bin/bash', '-c', [
'pip install -r requirements.txt -t /asset-output/',
'cp faust.png /asset-output',
'cp handler.py /asset-output',
].join(' && ')],
}
}),
runtime: Runtime.PYTHON_3_8,
architecture: Architecture.ARM_64,
layers: [al2Layer],
functionName: `py38`,
memorySize: 512,
timeout: Duration.seconds(30),
handler: 'handler.main',
});


new nodelambda.NodejsFunction(stack, 'node16', {
bundling: {
nodeModules: ['tesseractocr'],
commandHooks: {
beforeInstall() {
return [];
},
beforeBundling(inputDir: string, outputDir: string): string[] {
return [
`cp ${inputDir}/faust.png ${outputDir}`,
];
},
afterBundling(inputDir: string, outputDir: string): string[] {
return [];
},
},
},
depsLockFilePath: path.resolve(__dirname, 'lambda-handlers/node16/package-lock.json'),

runtime: Runtime.NODEJS_16_X,
entry: path.resolve(__dirname, 'lambda-handlers/node16/index.js'),
architecture: Architecture.ARM_64,
layers: [al2Layer],
functionName: `node16`,
memorySize: 512,
timeout: Duration.seconds(30),
handler: 'handler',
});

3 changes: 2 additions & 1 deletion continous-integration/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
"url": "https://github.com/bweigel/aws-lambda-tesseract-layer.git"
},
"scripts": {
"synth": "npx cdk synth --app \"npx ts-node index-all.ts\"",
"synth": "npx cdk synth --app \"npx ts-node index.ts\"",
"synth:aarch64": "npx cdk synth --app \"npx ts-node index-aarch64.ts\"",
"test:integration:py38": "sam local invoke -t cdk.out/tesseract-lambda-ci.template.json py38 --no-event > py38-test-output.txt && cat py38-test-output.txt | grep -Eiv \"(fail|error|exception)\"",
"test:integration:node16": "sam local invoke -t cdk.out/tesseract-lambda-ci.template.json node16 --no-event > node16-test-output.txt && cat node16-test-output.txt | grep -Eiv \"(fail|error|exception)\"",
"bundle:binary": "cp -r cdk.out/$(cat cdk.out/tesseract-lambda-ci.template.json | jq -r '.Resources.al2layer.Metadata.\"aws:asset:path\"')/. ../ready-to-use/amazonlinux-2"
Expand Down
54 changes: 35 additions & 19 deletions example/cdk/index.ts
Original file line number Diff line number Diff line change
@@ -1,41 +1,57 @@
import * as lambda from '@aws-cdk/aws-lambda';
import { Code, Runtime } from '@aws-cdk/aws-lambda';
import { RestApi, LambdaIntegration } from '@aws-cdk/aws-apigateway';
import * as lambda from 'aws-cdk-lib/aws-lambda';
import { Architecture, Code, Runtime } from 'aws-cdk-lib/aws-lambda';
import { RestApi, LambdaIntegration } from 'aws-cdk-lib/aws-apigateway';
import * as path from 'path';
import { App, BundlingDockerImage, Duration, Stack } from '@aws-cdk/core';
import { App, DockerImage, Duration, Stack } from 'aws-cdk-lib';


const app = new App();
const stack = new Stack(app, 'tesseract-ocr-example-cdk-py38');
const stack = new Stack(app, 'tesseract-ocr-example-cdk-py38', {tags: {'owner': 'bgenz'}});

/**
* Artifacts for AL 2
*/
const al2Layer = new lambda.LayerVersion(stack, 'al2-layer', {
const amdLayer = new lambda.LayerVersion(stack, 'amd-layer', {
code: Code.fromAsset(path.resolve(__dirname, '../../ready-to-use/amazonlinux-2')),
description: 'AL2 Tesseract Layer',
description: 'AL2 Tesseract Layer - AMD64',
});
const aarchLayer = new lambda.LayerVersion(stack, 'aarch-layer', {
code: Code.fromAsset(path.resolve(__dirname, '../../ready-to-use/amazonlinux-2-aarch64')),
description: 'AL2 Tesseract Layer - AARCH64',
});

const ocrFnAmd = new lambda.Function(stack, 'python3.8-amd', {
code: lambda.Code.fromDockerBuild(path.resolve(__dirname, 'lambda-handlers'),
{
platform: 'linux/amd64',
file: 'Dockerfile',
}),
runtime: Runtime.PYTHON_3_8,
architecture: Architecture.X86_64,
layers: [amdLayer],
memorySize: 1024,
timeout: Duration.seconds(10),
handler: 'handler.main',
});

const ocrFn = new lambda.Function(stack, 'python3.8', {
code: lambda.Code.fromAsset(path.resolve(__dirname, 'lambda-handlers'),
const ocrFnAarch = new lambda.Function(stack, 'python3.8-aarch', {
code: lambda.Code.fromDockerBuild(path.resolve(__dirname, 'lambda-handlers'),
{
bundling: {
image: BundlingDockerImage.fromRegistry('lambci/lambda:build-python3.8'),
command: ['/bin/bash', '-c', [
'pip install -r requirements.txt -t /asset-output/',
'cp handler.py /asset-output',
].join(' && ')],
}
platform: 'linux/arm64',
file: 'Dockerfile.arm',
}),
runtime: Runtime.PYTHON_3_8,
layers: [al2Layer],
architecture: Architecture.ARM_64,
layers: [aarchLayer],
memorySize: 1024,
timeout: Duration.seconds(10),
handler: 'handler.main',
});

const api = new RestApi(stack, 'ocr-api');
const ocr = api.root.addResource('ocr');
ocr.addMethod('POST', new LambdaIntegration(ocrFn, {proxy: true}));
const amd = api.root.addResource('amd');
const arm = api.root.addResource('arm');
amd.addMethod('POST', new LambdaIntegration(ocrFnAmd, {proxy: true}));
arm.addMethod('POST', new LambdaIntegration(ocrFnAarch, {proxy: true}));


7 changes: 7 additions & 0 deletions example/cdk/lambda-handlers/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM public.ecr.aws/sam/build-python3.8:1.62.0

WORKDIR /asset
COPY requirements.txt /asset/requirements.txt
COPY handler.py /asset/handler.py

RUN pip install -r requirements.txt -t /asset/ --only-binary=:all: --implementation cp
7 changes: 7 additions & 0 deletions example/cdk/lambda-handlers/Dockerfile.arm
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM public.ecr.aws/sam/build-python3.8:1.62.0-arm64

WORKDIR /asset
COPY requirements.txt /asset/requirements.txt
COPY handler.py /asset/handler.py

RUN pip install -r requirements.txt -t /asset/ --only-binary=:all: --implementation cp --platform manylinux2014_aarch64
Loading