In [None]:
import streamlit as st
import google.generativeai as genai

from PIL import Image
import pandas as pd
import json

from IPython.display import display
from IPython.display import Markdown
import textwrap
def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

GOOGLE_API_KEY = st.secrets["GOOGLE_API_KEY"]
genai.configure(api_key=GOOGLE_API_KEY)

generation_config = {
  "temperature": 0.2,
  "max_output_tokens": 4096,
  "top_k": 40,
  "top_p": 0.95,
}

model = genai.GenerativeModel(
  model_name='gemini-pro-vision',
  generation_config=generation_config,
  safety_settings={
    'harassment':'block_none',
    'hate':'block_none',
    'sex':'block_none',
    'danger':'block_none'
  }
)

def img_json(s:pd.Series) -> (Image, str):
  d = {k:"" if pd.isna(v) else v for k,v in s.to_dict().items()}
  f = d['file_name']
  del d['file_name']
  return Image.open(f), "JSON:\n"+json.dumps(d, ensure_ascii=False, indent=4)

def to_series(json_str, file_name):
  d = json.loads(json_str)
  d['receipt_datetime'] = pd.Timestamp(d['receipt_datetime'])
  d['file_name'] = file_name
  return pd.Series(d)

In [None]:
parts = ["""Transform the following recipt (or slip) in a JSON format in Korean(한국어).
Keys are ['receipt_datatime', 'business_name(상호명,가맹점명)', 'business_no(사업자번호)', 'address', 'tel', 'fax', 'e-mail', 'item_summary', 'currency unit', 'total'].
Do NOT translate the keys and addresses.
'business_no' is place near the 'business_name' and it is of the form "xxx-xx-xxxxx".
Categories are ['식사', '음료', '식료품', '주류', '사무용품', '의약품', ...].
"""]

example_df = pd.read_csv('./example/receipt.csv', index_col=0)
for i, s in example_df.iterrows():
  parts += img_json(s)

In [None]:
example_df

In [None]:
prompt = [Image.open('./example/imgs/000.jpg'), "JSON:\n"]
response = model.generate_content(parts + prompt)
to_markdown('```json\n'+response.text+'\n```')

In [None]:
to_series(response.text, './example/imgs/013.jpg')['file_name']

In [None]:
new_df = pd.concat([example_df, pd.DataFrame([to_series(response.text, './example/imgs/013.jpg')])]).reset_index(drop=True)

In [None]:
new_df

In [None]:
new_df.loc[13, 'business_name'] = '보비스쿰 카페테리아'
new_df.loc[13, 'business_no'] = '303-82-11227'

new_df

In [None]:
new_df.to_csv('./example/receipt.csv')

In [None]:
example_df.dtypes