Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM amazonlinux:2018.03.0.20200318.1
FROM public.ecr.aws/lambda/python:3.9-x86_64

ENV LEPTONICA_VERSION="1.75.1"
ENV LEPTONICA_VERSION="1.82.0"
# docker_build leptonica
WORKDIR /tmp/
RUN yum install clang wget zip gzip tar autoconf xz libpng-devel libtiff-devel zlib-devel libwebp-devel libjpeg-turbo-devel make libtool pkgconfig -y
Expand All @@ -15,4 +15,4 @@ RUN cd autoconf-archive-2019.01.06 && cp m4/* /usr/share/aclocal/

COPY build_tesseract.sh /tmp/build_tesseract.sh
RUN chmod +x /tmp/build_tesseract.sh
CMD sh /tmp/build_tesseract.sh
ENTRYPOINT ["sh" ,"/tmp/build_tesseract.sh"]
10 changes: 7 additions & 3 deletions build_tesseract.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
TESSERACT_VERSION="4.1.1"
TESSERACT_VERSION="5.1.0"

# docker_build tesseract
cd ~
Expand All @@ -18,15 +18,19 @@ mkdir tesseract-standalone
cd tesseract-standalone
cp /usr/local/bin/tesseract .
mkdir lib
cp /usr/local/lib/libtesseract.so.4 lib/
cp /usr/local/lib/libtesseract.so.5 lib/
cp /usr/local/lib/liblept.so.5 lib/
cp /usr/lib64/libjpeg.so.62 lib/
cp /usr/lib64/libwebp.so.4 lib/
cp /usr/lib64/libpng15.so.15 lib/
cp /usr/lib64/libtiff.so.5 lib/
cp /usr/lib64/libgomp.so.1 lib/
cp /usr/lib64/libjbig.so.2.0 lib/

mkdir tessdata
cd tessdata

wget https://github.com/tesseract-ocr/tessdata_fast/raw/master/eng.traineddata
wget https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata

mkdir configs
cp /usr/local/share/tessdata/configs/pdf configs/
Expand Down
9 changes: 5 additions & 4 deletions serverless.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
service: tesseract-aws-lambda

frameworkVersion: '3'

provider:
name: aws
runtime: python3.7
runtime: python3.9

package:
exclude:
Expand All @@ -19,19 +21,18 @@ package:
- test.jpg
- test_handler.py
- use_ocr_as_a_service.py
- layer/**

layers:
OCR:
path: layer
name: ocr-layer
description: Layer with Tesseract
compatibleRuntimes:
- python3.7
- python3.9
retain: false
package:
include:
- layer/**
- tesseract

functions:
ocr:
Expand Down