- pull docker image
docker push registry.cn-beijing.aliyuncs.com/hilab_space/chemdt:v1
docker pull registry.cn-beijing.aliyuncs.com/hilab_space/pdf_process:v1
- start the docker service
docker run --rm -e is_debug=N -e dev_or_pro=dev -p 5533:5533 chemdt:v1
docker run --rm -p 3114:3080 pdf_process:v1
- Install envirnment
conda env create -f environment.yaml
conda activate chemdt
- run main function
cd tools
python client.py
- Convert PDF fild to images.
client = ChemDTClient()
pages = client.pdf2img("path_of_pdf","save_dir")
- IUPAC detection.
client = ChemDTClient()
iupac_det_res = client.iupac_det(origin_img_path)["iupac_result"]
- OCR
client = ChemDTClient()
box, txt = client.iupac_ocr("path_of_image")
- Iupac fixer
client = ChemDTClient()
client.iupac_fixer("iupac_text")
- Full pipline
client = ChemDTClient()
pdf_path = '../test_data/p2.pdf'
save_dir = '../pdf_images'
client.run_pipline(pdf_path, save_dir = save_dir)
- Write extracting data to the local database. You need modify the configuration in
conf.yaml
python to_db.py
- Start webserver and wait a moment to let the front-end to load all your data.
streamlit run app.py --server.addres 0.0.0.0 --server.port 5001
- View your app in your browser with port 5001