# 
# JSON structured data from tables in scientific papers

## Jsonformer non-token approach for OpenAI
#### (based on: https://github.com/1rgs/jsonformer, https://github.com/martinezpl/jsonformer/tree/add-openai)

In [2]:
import os
import pandas as pd
import json
import time

import openai
from IPython.display import IFrame
from IPython.display import HTML

from LLMsTablesToJson.html_table import get_driver, extract_tableSource, extract_table, quit_driver
from LLMsTablesToJson.jsonformer_non_tokens import JsonformerNoTokens, OpenAIModel, highlight_values

DATA_DIR = "your_path"
chromedriver_path = os.path.join(DATA_DIR, "chromedriver.exe")

openai.api_key = "your_openai_api_key"

## Example: Supercapacitor performance of 2D nanocomposite materials

### Get table HTML code from DOI

In [85]:
doi = "10.1016/j.est.2023.107335"

In [86]:
IFrame(src="https://dx.doi.org/" + doi, width=900, height=380)

In [87]:
driver = get_driver(doi, chromedriver_path)
time.sleep(2)
tableSource = extract_tableSource(driver)

[<selenium.webdriver.remote.webelement.WebElement (session="eb112d109350f0ca4499e0f2bfe5e2a4", element="CB9685DC4D3C23054E076345708BDD08_element_96")>, <selenium.webdriver.remote.webelement.WebElement (session="eb112d109350f0ca4499e0f2bfe5e2a4", element="CB9685DC4D3C23054E076345708BDD08_element_97")>, <selenium.webdriver.remote.webelement.WebElement (session="eb112d109350f0ca4499e0f2bfe5e2a4", element="CB9685DC4D3C23054E076345708BDD08_element_98")>]
number of tables = 3


In [88]:
table_num = 2
html_table_supercapacitor = extract_table(tableSource, table_num)
quit_driver(driver)

In [89]:
html_table_supercapacitor

'<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><span class="anchor-text">[86]</span></a></td></tr><tr><td class="align-char">3</td><td>EDA- Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">486.2</td><td>1&nbsp;M H

In [90]:
with open("html_table_supercapacitor.txt", "w") as file:
    file.write(html_table_supercapacitor)

### Show table

In [92]:
HTML(html_table_supercapacitor)

Sl no,Electrodes,Specific capacitance (F/g),Electrolyte,Current density (A/g),Ref.
1,FG,276.0,1 M H2SO4,0.1,[85]
2,Ti3C2Tx,140.0,1 M KOH,5 mV/s,[86]
3,EDA- Ti3C2Tx,486.2,1 M H2SO4,2 mV/s,[88]
4,MoS2 NW,122.0,1 M Na2SO4,0.5,[89]
5,MoS2 NS,138.0,1 M Na2SO4,1,[90]
6,MoS2 NW,142.0,1 M KCL,0.59,[91]
7,NiCo-LDHs,1187.2,6 M KOH,1,[92]
8,CoSx/Ni-Co LDH,1562.0,6 M KOH,1,[93]


### Convert HTML table to JSON format with openAI LLMs

In [93]:
json_schema_supercapacitor = {
    "type": "object",
    "properties": {
        "Nanocomposites": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "Electrode": {"type": "string"},
                    "Specific capacitance (F/g)": {"type": "number"},
                    "Electrolyte": {"type": "string"},
                    "Current density (A/g)": {"type": "string"},
                },
            },
        },
    },
}

In [94]:
with open('json_schema_supercapacitor.json', 'w') as file:
    json.dump(json_schema_supercapacitor, file)
with open("json_schema_supercapacitor.json") as file:
    json_schema_supercapacitor = json.load(file)
    file.close()

In [95]:
json_schema_supercapacitor

{'type': 'object',
 'properties': {'Nanocomposites': {'type': 'array',
   'items': {'type': 'object',
    'properties': {'Electrode': {'type': 'string'},
     'Specific capacitance (F/g)': {'type': 'number'},
     'Electrolyte': {'type': 'string'},
     'Current density (A/g)': {'type': 'string'}}}}}}

In [96]:
builder = JsonformerNoTokens(
    model=OpenAIModel("text-davinci-003", debug=False),
    json_schema=json_schema_supercapacitor,
    text=html_table_supercapacitor,
    prompt="Generate a object with the following schema extracting the information from the provided table in html code:",
    temperature=0.1,
    debug=True,
    max_array_length=8,
    max_string_token_length=10,
)

print("Generating...")
result_supercapacitor = builder()

Generating...
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><span class="anchor-text">[86]</span><

[generate_string] response  "1 M H2SO4", "Current
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><s

[generate_number] response  140, "Electroly
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><span cl

[generate_string] response  "EDA- Ti3C2Tx",
[generate_number] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><span cl

[generate_string] response  "2 mV/s"}, {"Electro
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><sp

[generate_string] response  "1 M Na2SO4", "Current
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><

[generate_number] response  138, "Electroly
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><span cl

[generate_string] response  "MoS2 NW", "Specific capacitance
[generate_number] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="

[generate_string] response  "0.59"}, {"Electrode":
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><

[generate_string] response  "6 M KOH", "Current density (
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb

[generate_string] response  "CoSx/Ni-Co LDH
[generate_number] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb0430"><span cl

[generate_string] response  "6 M KOH", "Current density (
[generate_string] prompt Generate a object with the following schema extracting the information from the provided table in html code:
<table><thead><tr class="rowsep-1 valign-top"><th scope="col">Sl no</th><th scope="col">Electrodes</th><th scope="col">Specific capacitance (F/g)</th><th scope="col">Electrolyte</th><th scope="col">Current density (A/g)</th><th scope="col">Ref.</th></tr></thead><tbody><tr><td class="align-char">1</td><td>FG</td><td class="align-char">276</td><td>1&nbsp;M H<sub>2</sub>SO<sub>4</sub></td><td>0.1</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0425" name="bbb0425"><span class="anchor-text">[85]</span></a></td></tr><tr><td class="align-char">2</td><td>Ti<sub>3</sub>C<sub>2</sub>T<sub>x</sub></td><td class="align-char">140</td><td>1&nbsp;M KOH</td><td>5&nbsp;mV/s</td><td><a class="anchor workspace-trigger u-display-inline anchor-paragraph" href="#bb0430" name="bbb

In [97]:
highlight_values(result_supercapacitor)

{
  Nanocomposites: [
    {
      Electrode: [94m"FG"[0m,
      Specific capacitance (F/g): [94m276[0m,
      Electrolyte: [94m"1 M H2SO4"[0m,
      Current density (A/g): [94m"0.1"[0m
    },
    {
      Electrode: [94m"Ti3C2Tx"[0m,
      Specific capacitance (F/g): [94m140[0m,
      Electrolyte: [94m"1 M KOH"[0m,
      Current density (A/g): [94m"5 mV/s"[0m
    },
    {
      Electrode: [94m"EDA- Ti3C2Tx"[0m,
      Specific capacitance (F/g): [94m486.2[0m,
      Electrolyte: [94m"1 M H2SO4"[0m,
      Current density (A/g): [94m"2 mV/s"[0m
    },
    {
      Electrode: [94m"MoS2 NW"[0m,
      Specific capacitance (F/g): [94m122[0m,
      Electrolyte: [94m"1 M Na2SO4"[0m,
      Current density (A/g): [94m"0.5"[0m
    },
    {
      Electrode: [94m"MoS2 NS"[0m,
      Specific capacitance (F/g): [94m138[0m,
      Electrolyte: [94m"1 M Na2SO4"[0m,
      Current density (A/g): [94m"1"[0m
    },
    {
      Electrode: [94m"MoS2 NW"[0m,
      Specific

In [98]:
with open('json_table_supercapacitor.json', 'w') as file:
    json.dump(result_supercapacitor, file)