Note: The fine-tuning code has been slightly different as the version of openai has been updated to v1.0.0+. 

Here, we provide the latest code.

- Before 2023.11.06, we set ```model = "gpt-3.5-turbo-0613"```
- After 2023.11.06, we can set ```model = "gpt-3.5-turbo-1106" or "gpt-4-0125-preview"```(recommend)

## Environment (OS: Windows or Linux)

In [125]:
!pip install pandas
!pip install openai
!pip install textdistance




[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.1.2 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## Import Packages

In [11]:
import pandas as pd
import os
import csv
from openai import OpenAI
from typing import List
import textdistance

## Load Data

## 1. Processing training data into .jsonl

In [6]:
import json
import random
import pandas as pd
from tqdm import tqdm

system_message = """Extract the NMR information from the Paragraph."""

# system_message = """Extract text containing 1H NMR and 13C NMR data, remove interference information such as reactants, raw materials, solvents and other non-final product names based on text semantics, and then extract the name, code or number of the final product. Please delete the IUPAC name Alias, numbers and ordinal numbers before and after fields, such as '2.1.3.', '(HL4)', '(9)', '(4d)'. NMR text should contain complete information, such as instrument power and solvent information, For example, "13C NMR text": "13C NMR (400 MHz, acetone-d6) 174.0 (C), 157.7 (C). Then split the NMR text. The content in NMR conditions is NMR instrument power and solvent information, such as "13C NMR conditions": "400MHz, acetone-d6". The contents in the NMR data are only numbers, such as "13C NMR data": "174.0, 157.7", "1H NMR data": "174.0, 157.7". All responses must originate from information extracted from the given text, ensuring that the extracted content has not been modified or fragmented, and that capitalization and punctuation are exactly the same as the given text. Must end with {"IUPAC":"text","1H NMR text":"text","1H NMR conditions":"text","1H NMR data":"text","13C NMR text":"text","13C NMR conditions":"text","13C NMR data":"text"} format reply."""

def create_user_message(row):
    return f"""{row['Paragraph']}"""

def create_assistant_message(row):
    return f"""{{\"IUPAC\":\"{row['IUPAC']}\",\"1H NMR text\":\"{row['1H NMR text']}\",\"1H NMR conditions\":\"{row['1H NMR conditions']}\",\"1H NMR data\":\"{row['1H NMR data']}\",\"13C NMR text\":\"{row['13C NMR text']}\",\"13C NMR conditions\":\"{row['13C NMR conditions']}\",\"13C NMR data\":\"{row['13C NMR data']}\"}}"""

def prepare_example_conversation(row):
    messages = []
    messages.append({"role": "system", "content": system_message})

    user_message = create_user_message(row)
    messages.append({"role": "user", "content": user_message})

    assistant_message = create_assistant_message(row)
    messages.append({"role": "assistant", "content": assistant_message})

    return {"messages": messages}

def write_jsonl(data_list: list, filename: str) -> None:
    with open(filename, "w") as out:
        for ddict in data_list:
            jout = json.dumps(ddict) + "\n"
            out.write(jout)
            
train_input_file_path = "train_25.csv"
train_output_file_path = "train_25.jsonl"
test_input_file_path = "test_300.csv"
prediction_result_path = "prediction_result_without_prompt.csv"

train_df = pd.read_csv(train_input_file_path)
training_data = train_df.apply(prepare_example_conversation, axis=1).tolist()

write_jsonl(training_data, train_output_file_path)

## 2. Upload folds

In [130]:
# It will take few minutes for processing data after uploading ... 

client = OpenAI(
    api_key= 'sk-OPENAI_APIKEY'   # replace with your openai-api-key
)

training_response = client.files.create(
  file = open("train_25.jsonl", "rb"),
  purpose="fine-tune",
)

print("Training file:", training_response.model_dump_json(indent=2))
file_id = training_response.id

Training file: {
  "id": "file-Jzz6hmBZuqLGqrj84tI5Tng5",
  "bytes": 79488,
  "created_at": 1702039513,
  "filename": "train_25_data_in_50.jsonl",
  "object": "file",
  "purpose": "fine-tune",
  "status": "processed",
  "status_details": null
}


## 3. Create a fine-tuning job

- Before 1106, we set ```model = "gpt-3.5-turbo-0613"```

- After 1106, we recommend to set ```model = "gpt-3.5-turbo-1106"``` or ```model = "gpt-3.5-turbo-0125"```

In [131]:
fine_tuning_response = client.fine_tuning.jobs.create(
  training_file = file_id,
  model = "gpt-3.5-turbo-1106",         
  hyperparameters = {"n_epochs":3,}
)

print('fine_tuning_response:', fine_tuning_response.model_dump_json(indent=2))
fine_tuning_job_id = fine_tuning_response.id

fine_tuning_response: {
  "id": "ftjob-krYOi8AUuEO48THQSb7nzy5w",
  "created_at": 1702039515,
  "error": null,
  "fine_tuned_model": null,
  "finished_at": null,
  "hyperparameters": {
    "n_epochs": 3,
    "batch_size": "auto",
    "learning_rate_multiplier": "auto"
  },
  "model": "gpt-3.5-turbo-1106",
  "object": "fine_tuning.job",
  "organization_id": "org-4bdp8nLOXTwA9DZ6Apy7enJy",
  "result_files": [],
  "status": "validating_files",
  "trained_tokens": null,
  "training_file": "file-Jzz6hmBZuqLGqrj84tI5Tng5",
  "validation_file": null
}


## 4. Query the state of fine-tuning task 

In [146]:
# run the cell to see the status is "running" or "succeeded"

retrieve_job_response = client.fine_tuning.jobs.retrieve(fine_tuning_job_id)
print(retrieve_job_response.model_dump_json(indent=2))
print('status:', retrieve_job_response.status)
fine_tuned_model = retrieve_job_response.fine_tuned_model       # get the fine-tuned model

{
  "id": "ftjob-krYOi8AUuEO48THQSb7nzy5w",
  "created_at": 1702039515,
  "error": null,
  "fine_tuned_model": "ft:gpt-3.5-turbo-1106:personal::8TUbKUIt",
  "finished_at": 1702039833,
  "hyperparameters": {
    "n_epochs": 3,
    "batch_size": 1,
    "learning_rate_multiplier": 2
  },
  "model": "gpt-3.5-turbo-1106",
  "object": "fine_tuning.job",
  "organization_id": "org-4bdp8nLOXTwA9DZ6Apy7enJy",
  "result_files": [
    "file-1gpirigDitsLdr4MtjlQy3UI"
  ],
  "status": "succeeded",
  "trained_tokens": 97809,
  "training_file": "file-Jzz6hmBZuqLGqrj84tI5Tng5",
  "validation_file": null
}
status: succeeded


## 5. Use the fine-tuend model on single input

In [147]:
system_message = """Extract the NMR information from the Paragraph."""

# system_message = """Extract text containing 1H NMR and 13C NMR data, remove interference information such as reactants, raw materials, solvents and other non-final product names based on text semantics, and then extract the name, code or number of the final product. Please delete the IUPAC name Alias, numbers and ordinal numbers before and after fields, such as '2.1.3.', '(HL4)', '(9)', '(4d)'. NMR text should contain complete information, such as instrument power and solvent information, For example, "13C NMR text": "13C NMR (400 MHz, acetone-d6) 174.0 (C), 157.7 (C). Then split the NMR text. The content in NMR conditions is NMR instrument power and solvent information, such as "13C NMR conditions": "400MHz, acetone-d6". The contents in the NMR data are only numbers, such as "13C NMR data": "174.0, 157.7", "1H NMR data": "174.0, 157.7". All responses must originate from information extracted from the given text, ensuring that the extracted content has not been modified or fragmented, and that capitalization and punctuation are exactly the same as the given text. Must end with {"IUPAC":"text","1H NMR text":"text","1H NMR conditions":"text","1H NMR data":"text","13C NMR text":"text","13C NMR conditions":"text","13C NMR data":"text"} format reply."""

user_message = "3.6.5. 2,3-Di(thio-4-methylphenyl)quinoxaline (2e)Colorless crystals, 1.59 g (85%) yield; m.p. 150 °C. IR (KBr), 3049 (sp2 = C-H), 2968 (sp3-C-H), 1596 (C=N), and 751, 592 (C-S, asymmetric and symmetric stretching) cm−1. 1H-NMR (500 MHz, CDCl3, 300 K), 7.65 (dd, 4H, J = 6.3, 3.5 Hz, H8), 7.53 (d, 4H, J = 8.1 Hz, H2′, and H6′), 7.44 (dd, 2H, J = 6.4, 3.4 Hz, H7), 7.27 (d, 4H, J = 8.0 Hz, H3’and H5’), and 2.42 (s, 6H, 2CH3) ppm. 13C-NMR (APT) (125 MHz, CDCl3, 300 K), δ 154.01 (C), 140.32 (C), 139.26 (C), 135.13 (CH), 129.96 (CH), 128.19 (CH), 127.83 (CH), 124.95 (C), 21.28 (CH3) ppm. C22H18N2S2 requires: C, 70.5%; H, 4.8%; N, 7.4%. Found: C, 70.78%; H, 4.73%; N, 7.68%."

test_messages = []
test_messages.append({"role": "system", "content": system_message})
test_messages.append({"role": "user", "content": user_message})


print("Paragraphs: ", test_messages[1]['content'])

response = client.chat.completions.create(
  model = fine_tuned_model,
  messages = test_messages,
  temperature = 0  
)

print("response: ", response.choices[0].message.content)

Paragraphs:  3.6.5. 2,3-Di(thio-4-methylphenyl)quinoxaline (2e)Colorless crystals, 1.59 g (85%) yield; m.p. 150 °C. IR (KBr), 3049 (sp2 = C-H), 2968 (sp3-C-H), 1596 (C=N), and 751, 592 (C-S, asymmetric and symmetric stretching) cm−1. 1H-NMR (500 MHz, CDCl3, 300 K), 7.65 (dd, 4H, J = 6.3, 3.5 Hz, H8), 7.53 (d, 4H, J = 8.1 Hz, H2′, and H6′), 7.44 (dd, 2H, J = 6.4, 3.4 Hz, H7), 7.27 (d, 4H, J = 8.0 Hz, H3’and H5’), and 2.42 (s, 6H, 2CH3) ppm. 13C-NMR (APT) (125 MHz, CDCl3, 300 K), δ 154.01 (C), 140.32 (C), 139.26 (C), 135.13 (CH), 129.96 (CH), 128.19 (CH), 127.83 (CH), 124.95 (C), 21.28 (CH3) ppm. C22H18N2S2 requires: C, 70.5%; H, 4.8%; N, 7.4%. Found: C, 70.78%; H, 4.73%; N, 7.68%.
response:  {"IUPAC":"2,3-Di(thio-4-methylphenyl)quinoxaline","1H NMR text":"1H-NMR (500 MHz, CDCl3, 300 K), 7.65 (dd, 4H, J = 6.3, 3.5 Hz, H8), 7.53 (d, 4H, J = 8.1 Hz, H2′, and H6′), 7.44 (dd, 2H, J = 6.4, 3.4 Hz, H7), 7.27 (d, 4H, J = 8.0 Hz, H3’and H5’), and 2.42 (s, 6H, 2CH3) ppm","1H NMR conditions":"500 

## 6. Inference on multi inputs

In [149]:
# Here we take 25 of 300 test data for inferencing.
test_df = pd.read_csv(test_input_file_path)
test_paragraphs = list(test_df['Paragraph'][:25])
test_preds = []

total_tokens = 0
for user_message in tqdm(test_paragraphs):
  syst_message = system_message
  test_messages = []
  test_messages.append({"role": "system", "content": syst_message})
  test_messages.append({"role": "user", "content": user_message})

  print("\nParagraphs: ", test_messages[1]['content'])

  response = client.chat.completions.create(
      model = fine_tuned_model,   # model_name_id
      messages = test_messages,   # input
      temperature = 0
  )

  output = response.choices[0].message.content
  print("response: ", output)
  test_preds.append(output)
  tokens_number = response.usage.total_tokens
  total_tokens += tokens_number
# print("used total_tokens: ", total_tokens)

  0%|          | 0/25 [00:00<?, ?it/s]


Paragraphs:  Synthesis and crystallization A stirred solution of 3,5-bis­(tri­fluoro­meth­yl) aceto­phenone (0.5 g, 1.95 mmol) in acetic acid (5 mL) was added dropwise to bromine (0.312 g, 1.95 mmol) in acetic acid. The reaction medium was stirred at room temperature for 5 h. To the resulting mixture, water (5 mL) was added and the mixture was concentrated under reduced pressure. The residue obtained was diluted with ethyl­acetate (10 mL), the organic layer washed with water (10 mL) and a sodium bicarbonate solution (5 mL), and filtered through dried sodium sulfate and evaporated to obtain 1-(3,5-bis­(tri­fluoro­meth­yl)phen­yl)-2-bromo­ethanone as a light-yellow solid in 62% yield. m.p: 317–318 K. 1H NMR: (CDCl3, 600 MHz): 8.44 (2H, s), 8.13 (1H, s), 4.48 (2H, s); 13C NMR: (CDCl3, 150 MHz): 188.81, 135.31, 133.06, 132.83, 132.60, 128.99, 127.08, 127.06, 125.42, 123.61, 121.80, 120.00, 29.46.  Refinement Crystal data, data collection and structure refinement details are summarized in 

  4%|▍         | 1/25 [00:05<02:09,  5.41s/it]

response:  {"IUPAC":"1-(3,5-bis­(tri­fluoro­meth­yl)phen­yl)-2-bromo­ethanone","1H NMR text":"1H NMR: (CDCl3, 600 MHz): 8.44 (2H, s), 8.13 (1H, s), 4.48 (2H, s)","1H NMR conditions":"CDCl3, 600 MHz","1H NMR data":"8.44, 8.13, 4.48","13C NMR text":"13C NMR: (CDCl3, 150 MHz): 188.81, 135.31, 133.06, 132.83, 132.60, 128.99, 127.08, 127.06, 125.42, 123.61, 121.80, 120.00, 29.46","13C NMR conditions":"CDCl3, 150 MHz","13C NMR data":"188.81, 135.31, 133.06, 132.83, 132.60, 128.99, 127.08, 127.06, 125.42, 123.61, 121.80, 120.00, 29.46"}

Paragraphs:  Synthesis of N-(diaminomethylidene)-4-[(E)-(4-hydroxyphenyl)diazenyl] benzenesulfonamide (SPh) A solution of sodium nitrite (0.17 g, 7.3 mmol) in water (2 mL) was slowly added to an acidified solution of sulfaguanidine (0.64 g, 3 mmol) in water/acetone mixture (12 mL, 2:1) at 0°C. The resulting diazonium salt solution was gently added to an ice-cold solution of phenol (0.29 g, 3 mmol), NaOH (0.34 g, 8.5 mmol) and Na2CO3 (0.9 g, 8.5 mmol) in 10 mL

  8%|▊         | 2/25 [00:08<01:35,  4.16s/it]

response:  {"IUPAC":"N-(diaminomethylidene)-4-[(E)-(4-hydroxyphenyl)diazenyl] benzenesulfonamide","1H NMR text":"1H-NMR (500 MHz, DMSO-d6) δ (ppm): 6.77 (4H, bs, C-NH2), 6.9 (2H, d, Ar-H, J = 8.0 Hz),7.8 (2H, d, Ar-H, J = 10.0 Hz), 7.9 (4H, d, Ar-H, J = 8 Hz)","1H NMR conditions":"500 MHz, DMSO-d6","1H NMR data":"6.77, 6.9, 7.8, 7.9","13C NMR text":"13C-NMR (125 MHz, DMSO-d6) δ: 161.6, 158.1, 153.2, 145.1, 126.8, 125.2, 116.0","13C NMR conditions":"125 MHz, DMSO-d6","13C NMR data":"161.6, 158.1, 153.2, 145.1, 126.8, 125.2, 116.0"}

Paragraphs:  1-(3,4-Dimethylphenyl)-5-phenyl-1H-pyrazole-3,4-dicarboxylic acid (2) Compound 1 (0.364 g, 1 mmol) was heated in solution of sodium hydroxide (0.1 g 2.5 mmol) in 20 mL water for 1 h. The solution was cooled down to room temperature. It was added with concentrated hydrochloric acid (1.5 mL) and water (1.5 mL). The white solid product was occurred. It was filtered and it was washed with water. Yield 90%. Color white. mp 126–128 °C. FT-IR (ν, cm−1)

 12%|█▏        | 3/25 [00:13<01:36,  4.38s/it]

response:  {"IUPAC":"1-(3,4-Dimethylphenyl)-5-phenyl-1H-pyrazole-3,4-dicarboxylic acid","1H NMR text":"1H NMR (400 MHz, CDCl3) δ (ppm) 11.6 (br.s, 2H, -OH), 8.1 (m, 1H), 7.9 (m, 2H), 7.6 (m, 2H), 7.1 (m, 3H), 2.1 (s, 3H, Ar-CH3), 1.8 (s, 3H, Ar-CH3)","1H NMR conditions":"400 MHz, CDCl3","1H NMR data":"11.6, 8.1, 7.9, 7.6, 7.1, 2.1, 1.8","13C NMR text":"13C NMR (100 MHz, CDCl3) δ (ppm) 171.3, 167.9 (C=O, acid), 145.2 (C3), 142.1 (C5), 135.0, 134.0, 132.9, 132.8, 132.0, 128.0, 127.2, 125.0, 115.8, 115.7 (C4), 29.4 (Ar-CH3), 20.7 (Ar-CH3)","13C NMR conditions":"100 MHz, CDCl3","13C NMR data":"171.3, 167.9, 145.2, 142.1, 135.0, 134.0, 132.9, 132.8, 132.0, 128.0, 127.2, 125.0, 115.8, 115.7, 29.4, 20.7"}

Paragraphs:  Synthesis of [(M)-d-4]-C12-TEG Under an argon atmosphere, a mixture of 4 (45.0 mg, 0.0451 mmol), tris(dibenzylideneacetone)dipalladium(0) chloroform adduct (0.82 mg, 0.753 μmol), cuprous iodide (1.72 mg, 9.03 μmol), tris(2,4,6-trimethylphenyl)phosphine (1.75 mg, 4.52 μmol), tri

 16%|█▌        | 4/25 [00:27<02:54,  8.32s/it]

response:  {"IUPAC":"[(M)-d-4]-C12-TEG","1H NMR text":"1H NMR (400 MHz, CDCl3): δ 0.86 (9H, t, J = 6.8 Hz), 1.25–1.52 (74H, m), 1.72–1.89 (14H, m), 1.97 (12H, s), 2.00 (12H, s), 3.37 (18H, s), 3.52–3.55 (12H, m), 3.62–3.68 (24H, m), 3.70–3.75 (12H, m), 3.80 (4H, t, J = 5.2 Hz), 3.87 (8H, t, J = 5.0 Hz), 4.01 (4H, t, J = 6.6 Hz), 4.18–4.23 (12H, m), 4.28 (4H, t, J = 6.8 Hz), 4.41–4.45 (6H, m), 6.94 (4H, dt, J = 8.8, 1.8 Hz), 7.29 (4H, s), 7.46–7.52 (8H, m), 7.64 (4H, dt, J = 8.8, 1.8 Hz), 7.66–7.77 (8H, m), 8.06 (2H, s), 8.12 (2H, s), 8.16 (4H, s), 8.21 (2H, t, J = 1.6 Hz) 8.22 (1H, t, J = 1.6 Hz), 8.36–8.38 (6H, m), 8.52–8.58 (8H, m)","1H NMR conditions":"400 MHz, CDCl3","1H NMR data":"0.86, 1.25–1.52, 1.72–1.89, 1.97, 2.00, 3.37, 3.52–3.55, 3.62–3.68, 3.70–3.75, 3.80, 3.87, 4.01, 4.18–4.23, 4.28, 4.41–4.45, 6.94, 7.29, 7.46–7.52, 7.64, 7.66–7.77, 8.06, 8.12, 8.16, 8.21 8.22, 8.36–8.38, 8.52–8.58","13C NMR text":"13C NMR (100 MHz, CDCl3): δ 14.1, 22.6, 23.2, 25.96, 26.02, 28.7, 29.2, 2

 20%|██        | 5/25 [00:34<02:32,  7.63s/it]

response:  {"IUPAC":"3-Amino-5-(1-methyl-1H-pyrazol-4-yl)pyridin-2(1H)-one","1H NMR text":"1H NMR (500 MHz, DMSO‑d6) 3.81 (3H, s), 5.09 (2H, s), 6.63 (1H, d, J = 2.3 Hz), 6.83 (1H, d, J = 2.3 Hz), 7.59 (1H, s), 7.83 (1H, s), 11.38 (1H, s)","1H NMR conditions":"500 MHz, DMSO‑d6","1H NMR data":"3.81, 5.09, 6.63, 6.83, 7.59, 7.83, 11.38","13C NMR text":"13C NMR (126 MHz, DMSO‑d6) 157.3 (C), 139.2 (C), 135.6 (CH), 127.0 (CH), 119.7 (C), 115.8 (CH), 112.3 (C), 110.1 (CH), 39.0 (CH3)","13C NMR conditions":"126 MHz, DMSO‑d6","13C NMR data":"157.3, 139.2, 135.6, 127.0, 119.7, 115.8, 112.3, 110.1, 39.0"}

Paragraphs:  2.4. Preparation of Ionic Liquids With diethylene glycol-bridged functionalized imidazolium dicationic ionic liquids as an example, synthesis route of polyethylene glycol-bridged functionalized imidazolium dicationic ionic liquids was shown in four steps as follows: Firstly, thionyl chloride was added to diethylene glycol in anhydrous environment and then heated to 250°C until no 

 24%|██▍       | 6/25 [00:40<02:17,  7.24s/it]

response:  {"IUPAC":"diethylene glycol-bridged functionalized imidazolium dicationic ionic liquids","1H NMR text":"1H NMR (600\u2009MHz, CD3COCD3, δ, ppm): 3.97 (s, 6H, N–CH3), 4.05 (4H, s, N–CH2), 4.32 (4H, s, O–CH2), 7.68 (4H, C (4,5)–H), 9.03 (2H, C(2)–H)","1H NMR conditions":"600\u2009MHz, CD3COCD3, δ, ppm","1H NMR data":"3.97, 4.05, 4.32, 7.68, 9.03","13C NMR text":"13C NMR (150\u2009MHz, CD3COCD3, δ, ppm): 136.7 (C-2), 124.5, 124.0 (C-4,5), 69.5(O–CH2), 50.3 (N–CH2), and 36.6 (N–CH3)","13C NMR conditions":"150\u2009MHz, CD3COCD3, δ, ppm","13C NMR data":"136.7, 124.5, 124.0, 69.5, 50.3, 36.6"}

Paragraphs:   1-(4-Methoxy-3-thiocyanatophenyl)ethyl acetate (2f). Yield 62% (156 mg). Obtained as a colorless oil; 1H NMR (CDCl3, 400 MHz) δ 1.51 (d, J = 6.8 Hz, 3H), 2.05 (s, 3H), 3.89 (s, 3H), 5.80 (q, J = 6.8 Hz, 1H), 6.88 (d, J = 8.3 Hz, 1H), 7.32 (dd, J = 8.3, 1.9 Hz, 1H), 7.53 (d, J = 2.0 Hz, 1H) ppm; 13C NMR (100 MHz, CDCl3) δ 21.3, 22.1, 56.3, 71.3, 110.3, 111.2, 113.3, 127.6, 128.

 28%|██▊       | 7/25 [00:44<01:52,  6.25s/it]

response:  {"IUPAC":"1-(4-Methoxy-3-thiocyanatophenyl)ethyl acetate","1H NMR text":"1H NMR (CDCl3, 400 MHz) δ 1.51 (d, J = 6.8 Hz, 3H), 2.05 (s, 3H), 3.89 (s, 3H), 5.80 (q, J = 6.8 Hz, 1H), 6.88 (d, J = 8.3 Hz, 1H), 7.32 (dd, J = 8.3, 1.9 Hz, 1H), 7.53 (d, J = 2.0 Hz, 1H) ppm","1H NMR conditions":"CDCl3, 400 MHz","1H NMR data":"1.51, 2.05, 3.89, 5.80, 6.88, 7.32, 7.53","13C NMR text":"13C NMR (100 MHz, CDCl3) δ 21.3, 22.1, 56.3, 71.3, 110.3, 111.2, 113.3, 127.6, 128.7, 135.8, 156.0, 170.2 ppm","13C NMR conditions":"CDCl3, 100 MHz","13C NMR data":"21.3, 22.1, 56.3, 71.3, 110.3, 111.2, 113.3, 127.6, 128.7, 135.8, 156.0, 170.2"}

Paragraphs:   N-(2-(piperidin-1-yl)ethyl)pyrazine-2-carboxamide (1d): white crystal; yield 84.0%; m.p. 185–187 °C; 1H NMR (CDCl3, 300 MHz): δ = 9.96 (1H, s, pyrazine-H), 9.10 (1H, dd, J = 7.6 Hz, pyrazine-H), 8.89 (1H, dd, J = 7.6 Hz, pyrazine-H), 8.81 (1H, s, –NH–), 3.36 (2H, s, J = 5.8 Hz, N–CH2–), 2.46 (2H, s, –CH2–), 2.42 (4H, m, –CH2–), 1.49 (4H, m, –CH2–), 

 32%|███▏      | 8/25 [00:54<02:03,  7.24s/it]

response:  {"IUPAC":"N-(2-(piperidin-1-yl)ethyl)pyrazine-2-carboxamide","1H NMR text":"1H NMR (CDCl3, 300 MHz): δ = 9.96 (1H, s, pyrazine-H), 9.10 (1H, dd, J = 7.6 Hz, pyrazine-H), 8.89 (1H, dd, J = 7.6 Hz, pyrazine-H), 8.81 (1H, s, –NH–), 3.36 (2H, s, J = 5.8 Hz, N–CH2–), 2.46 (2H, s, –CH2–), 2.42 (4H, m, –CH2–), 1.49 (4H, m, –CH2–), 1.37 (2H, m, –CH2–)","1H NMR conditions":"CDCl3, 300 MHz","1H NMR data":"9.96, 9.10, 8.89, 8.81, 3.36, 2.46, 2.42, 1.49, 1.37","13C NMR text":"13C NMR (CDCl3, 75 MHz): δ = 160.7 (C=O), 146.6, 145.0, 144.7, 144.6 (pyrazine-C), 57.2 (CH2, N–CH2), 56.8, 54.0, 25.9, 24.5 (CH2, CH2–CH2)","13C NMR conditions":"CDCl3, 75 MHz","13C NMR data":"160.7, 146.6, 145.0, 144.7, 144.6, 57.2, 56.8, 54.0, 25.9, 24.5"}

Paragraphs:  Sodium methanethiosulfonate (1.9 g, 14.37 mmol) and 4-bromobutanol (2.0 g, 13.07 mmol) were mixed together in anhydrous DMF (7 ml) under inert atmosphere (Scheme 1). The reaction was stirred at 60 °C for 5 h and was monitored by thin layer chroma

 36%|███▌      | 9/25 [00:57<01:36,  6.05s/it]

response:  {"IUPAC":"4-(methylsulfanyl)butan-1-ol","1H NMR text":"1H-NMR (300 MHz, DMSO-d6): δ = 4.45 (br s, 1H, OH collapsed with D2O), 3.48 (s, 3H, CH3), 3.42 (t, 2H, J = 8.7 Hz, CH 2OH), 3.19 (t, 2H, J = 6.9 Hz, SCH2), 1.76-1.66 (m, 2H, CH2), 1.52–1.43 (m, 2H, CH2) ppm","1H NMR conditions":"300 MHz, DMSO-d6","1H NMR data":"4.45, 3.48, 3.42, 3.19, 1.76-1.66, 1.52–1.43","13C NMR text":"13C-NMR (75 MHz, CDCl3): δ = 62.0, 50.9, 36.4, 31.3, 26.3","13C NMR conditions":"75 MHz, CDCl3","13C NMR data":"62.0, 50.9, 36.4, 31.3, 26.3"}

Paragraphs:  2.3.4. (E)-3-(2-(3,4-dichlorobenzylidene)hydrazinyl)quinoxalin-2(1H)-one (3d) Prepared according to general procedure A. Yield 67%, as a yellow solid, mp 244–246 °C, IR (KBr, cm−1) νmax = 3438 (N–H), 3042 (C–H, sp2), 1682 (C=O), 1568 and 1522 (C=N). 1H NMR (500 MHz, DMSO-d6) δ (ppm): 12.42 (s, 1H, NHCO). 11.39 (s, 1H, NH), 8.52 (s, 1H, CH); 8.28 (m, 2H, H-5′ and H-6′), 7.90 (m, 1H, H-2′), 7.20 (m, 4H, H-5 and H-6 and H-7 and H-8), 13C NMR (500 MHz, 

 40%|████      | 10/25 [01:02<01:25,  5.69s/it]

response:  {"IUPAC":"(E)-3-(2-(3,4-dichlorobenzylidene)hydrazinyl)quinoxalin-2(1H)-one","1H NMR text":"1H NMR (500 MHz, DMSO-d6) δ (ppm): 12.42 (s, 1H, NHCO). 11.39 (s, 1H, NH), 8.52 (s, 1H, CH); 8.28 (m, 2H, H-5′ and H-6′), 7.90 (m, 1H, H-2′), 7.20 (m, 4H, H-5 and H-6 and H-7 and H-8)","1H NMR conditions":"500 MHz, DMSO-d6","1H NMR data":"12.42, 11.39, 8.52, 8.28, 7.90, 7.20","13C NMR text":"13C NMR (500 MHz, DMSO-d6) δ (ppm): 172.0, 155.0, 150.7, 146.1, 143.8, 135.6, 131.7, 131.0, 127.9, 115.1, and 115.0","13C NMR conditions":"500 MHz, DMSO-d6","13C NMR data":"172.0, 155.0, 150.7, 146.1, 143.8, 135.6, 131.7, 131.0, 127.9, 115.1, 115.0"}

Paragraphs:   1-(4-Hydroxybenzylidene)-4-(5-nitrothiazol-2-yl)semicarbazide11: IR (KBr): ʋ = 3419.66 (OH str), 3325.39, 3192.30 (N–H str), 2931.90 (aromatic C–H str), 1670.41 (C=O str), 1599.04 (C=N str), 1458.23, 1388.79 (NO2 str), 1161.19 (C–N str); 1H NMR ((D6)DMSO, 300 MHz): δ = 4.96 (s, 1H, OH), 6.85 (d, J = 8.4 Hz, 2H, Ar C-3, Ar C-5), 7.42 (d,

 44%|████▍     | 11/25 [01:07<01:17,  5.56s/it]

response:  {"IUPAC":"1-(4-Hydroxybenzylidene)-4-(5-nitrothiazol-2-yl)semicarbazide","1H NMR text":"1H NMR ((D6)DMSO, 300 MHz): δ = 4.96 (s, 1H, OH), 6.85 (d, J = 8.4 Hz, 2H, Ar C-3, Ar C-5), 7.42 (d, J = 7.5 Hz, 2H, Ar C-2, Ar C-6), 8.09 (s, 1H, CH), 8.58 (s, 1H, thiazole C–H), 9.68 (s, 1H, NH), 10.08 ppm (s, 1H, CONH)","1H NMR conditions":"(D6)DMSO, 300 MHz","1H NMR data":"4.96, 6.85, 7.42, 8.09, 8.58, 9.68, 10.08","13C NMR text":"13C NMR ((D6)DMSO, 75 MHz): δ = 109.52 (thiazole C-5), 115.75 (Ar C-3, Ar C-5), 121.12 (Ar C-1), 122.54 (Ar C-2, Ar C-6), 141.07 (thiazole C-4), 144.66 (C=N), 158.33 (C=O), 161.09 (Ar C-4), 164.26 ppm (thiazole C-2)","13C NMR conditions":"(D6)DMSO, 75 MHz","13C NMR data":"109.52, 115.75, 121.12, 122.54, 141.07, 144.66, 158.33, 161.09, 164.26"}

Paragraphs:  3-Butyl-8a-methyl-4a,5-dihydrobenzo-1,2,4-trioxin-6(8aH)-one (3c) UV spectrum (MeOH), λ max, nm: 218.6; IR (KBr, cm−1) υ: 2923.67, 2888.36 (C–H, CH3), 1668.59 (C=O, conj.), 1177.34 (C–O), 886.56 (O–O); 1H

 48%|████▊     | 12/25 [01:13<01:11,  5.48s/it]

response:  {"IUPAC":"3-Butyl-8a-methyl-4a,5-dihydrobenzo-1,2,4-trioxin-6(8aH)-one","1H NMR text":"1H NMR (400 MHz, CDCl3) δ: 0.92 (t, J = 6.0 Hz, 3H, CH3), 1.18 (s, 3H, CH3), 1.46 (m, 2H, CH2–CH 2–CH3), 1.52 (m, 2H, CH 2–CH2–CH3), 1.63 (m, 2H, CH2–CH2–CH2–CH3), 2.52 (d, J = 3.0 Hz, 2H, CO–CH2–CH), 4.23 (d, J = 3.0 Hz, 1H, CO–CH2–CH), 6.06 (t, J = 6.0 Hz, 1H, CH–CH2–CH3), 6.09 (d, J = 10.0, 1H, CO–CH=CH), 6.34 (d, J = 9.0, 1H, CO–CH=CH)","1H NMR conditions":"400 MHz, CDCl3","1H NMR data":"0.92, 1.18, 1.46, 1.52, 1.63, 2.52, 4.23, 6.06, 6.09, 6.34","13C NMR text":"13C NMR (100 MHz, CDCl3) δ: 15.53, 18.18 (CH3), 22.43, 28.10, 32.62 (CH2), 36.36, 46.21 (CH), 86.54, 93.26 (CH=CH), 187.60 (C=O)","13C NMR conditions":"100 MHz, CDCl3","13C NMR data":"15.53, 18.18, 22.43, 28.10, 32.62, 36.36, 46.21, 86.54, 93.26, 187.60"}

Paragraphs:  (3R,4S,5R)-3,4,5-tri[(tert-butyldimethylsilyl)oxy]cyclohex-1-enecarboxylic acid (2) To a solution of (-)-shikimic acid 1 (1.0 g, 5.74 mmol) in anhydrous dimethyl

 52%|█████▏    | 13/25 [01:20<01:13,  6.09s/it]

response:  {"IUPAC":"(3R,4S,5R)-3,4,5-tri[(tert-butyldimethylsilyl)oxy]cyclohex-1-enecarboxylic acid","1H NMR text":"1H NMR (400 MHz, CDCl3): δ = 0.07 (s, 12H, CH3Si), 0.11 (s, 3H, CH3Si), 0.12 (s, 3H, CH3Si), 0.84 (s, 9H, (CH3)3CSi), 0.86 (s, 9H, (CH3)3CSi), 0.94 (s, 9H, (CH3)3CSi), 2.17 (dd, J = 17.8, 1.0 Hz, 1H, H-6), 2.58 (ddt, J = 17.8, 3.0, 3.0 Hz, 1H, H-6′), 3.74–3.78 (m, 1H, H-4), 3.98–4.02 (m, 1H, H-5), 4.62–4.66 (m, 1H, H-3), 6.82–6.85 (m, 1H, H-2) ppm","1H NMR conditions":"400 MHz, CDCl3","1H NMR data":"0.07, 0.11, 0.12, 0.84, 0.86, 0.94, 2.17, 2.58, 3.74–3.78, 3.98–4.02, 4.62–4.66, 6.82–6.85","13C NMR text":"13C NMR (100 MHz, CDCl3): δ = −4.7 (CH3Si), −4.6 (CH3Si), −4.5 (CH3Si), −4.2 (CH3Si), 18.1 (C(CH3)3), 18.3 (C(CH3)3), 18.7 (C(CH3)3), 25.9 (C(CH3)3), 26.0 (C(CH3)3), 26.4 (C(CH3)3), 29.4 (C-6), 68.4 (C-4), 70.0 (C-3), 72.6 (C-5), 126.7 (C-1), 143.3 (C-2), 172.9 (C=O) ppm","13C NMR conditions":"100 MHz, CDCl3","13C NMR data":"−4.7, −4.6, −4.5, −4.2, 18.1, 18.3, 18.7, 25.

 56%|█████▌    | 14/25 [01:27<01:10,  6.38s/it]

response:  {"IUPAC":"4-Cinnamamido-1-(3-fluorobenzyl) pyridin-1-ium bromide","1H NMR text":"1H NMR (400 MHz, DMSO-d6) δ 11.84 (s, 1H), 8.96 (d, J = 6.7 Hz, 2H), 8.23 (d, J = 6.5 Hz, 2H), 7.80 (d, J = 15.8 Hz, 1H), 7.70 (d, J = 4.9 Hz, 2H), 7.54–7.45 (m, 4H), 7.42 (d, J = 9.7 Hz, 1H), 7.34 (d, J = 7.6 Hz, 1H), 7.28 (t, J = 8.6 Hz, 1H), 7.00 (d, J = 15.8 Hz, 1H), 5.74 (s, 2H)","1H NMR conditions":"400 MHz, DMSO-d6","1H NMR data":"11.84, 8.96, 8.23, 7.80, 7.70, 7.54–7.45, 7.42, 7.34, 7.28, 7.00, 5.74","13C NMR text":"13C NMR (100 MHz, DMSO-d6) δ 165.93, 163.72 (d, 1JCF=246.44 Hz), 152.85, 145.78, 145.78, 144.60, 137.61 (d, 3JCF=7.58 Hz), 134.41, 131.80 (d, 3JCF=8.29 Hz), 131.34, 129.66, 129.66, 128.79, 128.79, 120.64, 116.51 (d, 2JCF=21.72 Hz), 116.07 (d, 2JCF=22.15 Hz), 115.80, 61.27","13C NMR conditions":"100 MHz, DMSO-d6","13C NMR data":"165.93, 163.72, 152.85, 145.78, 145.78, 144.60, 137.61, 134.41, 131.80, 131.34, 129.66, 129.66, 128.79, 128.79, 120.64, 116.51, 116.07, 115.80, 61.27"

 60%|██████    | 15/25 [01:35<01:08,  6.84s/it]

response:  {"IUPAC":"N-(cyclohexylcarbamothioyl)-2-oxo-2H-chromene-3-carboxamide","1H NMR text":"1H NMR (CDCl3, 300 MHz) δ/ppm: 1.24–1.49 (4H, s), 1.61–1.65 (2H, m), 1.73–1.78 (2H, m), 1.97–2.00 (2H, m), 3.94–4.00 (1H, m), 7.35–7.42 (2H, m), 7.63–7.71 (2H, m), 8.76 (1H, d, J = 6.7 Hz, NH), 8.9 (1H, s)","1H NMR conditions":"CDCl3, 300 MHz","1H NMR data":"1.24–1.49, 1.61–1.65, 1.73–1.78, 1.97–2.00, 3.94–4.00, 7.35–7.42, 7.63–7.71, 8.76, 8.9","13C NMR text":"13C NMR (CDCl3, 75 MHz) δ/ppm:, 24.9, 25.8, 32.9, 48.7, 116.8, 118.9, 125.4, 129.9, 134.1, 148.3, 150.9, 154.6, 160.6, 161.7","13C NMR conditions":"CDCl3, 75 MHz","13C NMR data":"24.9, 25.8, 32.9, 48.7, 116.8, 118.9, 125.4, 129.9, 134.1, 148.3, 150.9, 154.6, 160.6, 161.7"}

Paragraphs:  Synthesis of 3-(dimethylamino)-1-[4-(piperidin-1-yl) phenyl]prop-2-en-1-one (III) A mixture of 1-[4-(piperidin-1-yl) phenyl]ethan-1-one (I) (0.02 mol) and dimethylformamide-dimethylacetal (DMF-DMA) (II) (0.023 mol) was refluxed for 10 h without solvent

 64%|██████▍   | 16/25 [01:43<01:05,  7.24s/it]

response:  {"IUPAC":"3-(dimethylamino)-1-[4-(piperidin-1-yl) phenyl]prop-2-en-1-one","1H NMR text":"1H NMR (500 MHz, DMSO-d6): δ = 1.5 (6H, s, 3 × –CH2, piperidine), 2.89 (3H, s, NCH3), 3.09 (4H, s, 2 × –CH2, piperidine), 3.17 (3H, s, NCH3), 5.79 (1H, d, J = 12.5 Hz, = CH), 6.91 (2H, t, J = 9.0 Hz, Ar-H), 7.65 (1H, d, J = 12.5 Hz, =CH), 7.78 (2H, d, J = 8.5 Hz, Ar-H)","1H NMR conditions":"500 MHz, DMSO-d6","1H NMR data":"1.5, 2.89, 3.09, 3.17, 5.79, 6.91, 7.65, 7.78","13C NMR text":"13C NMR (125.76 MHz, DMSO-d6): δ = 24.4, 25.4, 48.8, 91.1, 113.9, 129.3, 129.6, 163.4, 163.5, 188.0","13C NMR conditions":"125.76 MHz, DMSO-d6","13C NMR data":"24.4, 25.4, 48.8, 91.1, 113.9, 129.3, 129.6, 163.4, 163.5, 188.0"}

Paragraphs:  7-(4-Chlorophenyl)-2-(3-methyl-5-oxo-2,3-dihydro-1H-pyrazol-1-yl)-5-(p-tolyl)pyrido-[2,3-d]pyrimidin-4(3H)-one (5a) Yield: 67%; mp: 385–387 °C; IR (KBr, cm−1) ν: 3422 (2NH), 1716 (2C=O); 1HNMR (400 MHz, DMSO-d6): δ 11.16, 11.66 (2s, 2H, 2NH), 8.21 (d, 2H), 8.18 (s, 1H, C

 68%|██████▊   | 17/25 [01:48<00:52,  6.61s/it]

response:  {"IUPAC":"7-(4-Chlorophenyl)-2-(3-methyl-5-oxo-2,3-dihydro-1H-pyrazol-1-yl)-5-(p-tolyl)pyrido-[2,3-d]pyrimidin-4(3H)-one","1H NMR text":"1HNMR (400 MHz, DMSO-d6): δ 11.16, 11.66 (2s, 2H, 2NH), 8.21 (d, 2H), 8.18 (s, 1H, C-4 pyrazolone), 7.57 (d, 2H, J = 8 Hz), 7.50 (s, 1H, C-6 pyridine), 7.30 (d, 2H, J = 8 Hz), 7.19 (d, 2H, J = 8 Hz), 2.35 (s, 3H, CH3), 1.89 (s, 3H, CH3)","1H NMR conditions":"400 MHz, DMSO-d6","1H NMR data":"11.16, 11.66, 8.21, 8.18, 7.57, 7.50, 7.30, 7.19, 2.35, 1.89","13C NMR text":"13CNMR (100 MHz, DMSO-d6): δ 172.44, 161.77, 157.81, 154.58, 153.98, 137.86, 136.26, 136.07,135.88, 129.63, 129.34, 129.09, 128.42, 118.39, 106.54, 21.50, 21.32","13C NMR conditions":"100 MHz, DMSO-d6","13C NMR data":"172.44, 161.77, 157.81, 154.58, 153.98, 137.86, 136.26, 136.07,135.88, 129.63, 129.34, 129.09, 128.42, 118.39, 106.54, 21.50, 21.32"}

Paragraphs:   Hydrazide 3 hydrochloride. Boc carbazate 8 (68 mg, 0.30 mmol, 1 equiv) was dissolved in dioxane (0.5 mL) and dilute

 72%|███████▏  | 18/25 [01:53<00:42,  6.13s/it]

response:  {"IUPAC":"acyl hydrazide 3 hydrochloride","1H NMR text":"1H NMR (DMSO-d6, 200 MHz) δ 11.01 (s, 1H, NH), 10.70–10.20 (brs, 3H, NH3), 2.22 (t, J = 7.2 Hz, 2H, CH2–C = O), 1.53 (pent, J = 7.1 Hz, 2H, CH2), 1.36–1.11 (m, 4H, 2CH2), 0.86 (t, J = 6.5 Hz, 3H, CH3)","1H NMR conditions":"DMSO-d6, 200 MHz","1H NMR data":"11.01, 10.70–10.20, 2.22, 1.53, 1.36–1.11, 0.86","13C NMR text":"13C NMR (DMSO-d6, 50 MHz) δ 171.72, 32.86, 30.72, 24.45, 21.90, 13.95","13C NMR conditions":"DMSO-d6, 50 MHz","13C NMR data":"171.72, 32.86, 30.72, 24.45, 21.90, 13.95"}

Paragraphs:  Commercially available 2-fluorobenzaldehyde (1) (0.45 g, 0.38 ml, 3.62 mmol) and K2CO3 (1.0 g, 7.24 mmol) were introduced in a sealed tube under an argon atmosphere. Anhydrous DMSO (0.38 ml) and a 2 M solution of (CH3)2NH in THF (2.0 ml, 4.0 mmol) were added and the mixture was heated to reflux temperature (about 100 °C) for 3 h. Then, after cooling, another aliquot (2.0 ml, 4.0 mmol) of 2 M solution of (CH3)2NH in THF was 

 76%|███████▌  | 19/25 [01:57<00:32,  5.34s/it]

response:  {"IUPAC":"2-(dimethylamino)benzaldehyde","1H NMR text":"1H NMR (400.13 MHz, CDCl3) δ 10.20 (s, 1H), 7.74 (dd, J 1 = 7.6 Hz, J2 = 1.6 Hz, 1H), 7.46–7.41 (m, 1H), 7.02 (d, J = 8.0 Hz, 1H), 6.97 (t, J = 7.6 Hz, 1H), 2.89 (s, 6H)","1H NMR conditions":"400.13 MHz, CDCl3","1H NMR data":"10.20, 7.74, 7.46–7.41, 7.02, 6.97, 2.89","13C NMR text":"13C NMR (100.6 MHz, CDCl3) δ 191.2, 155.9, 134.6, 131.1, 127.1, 120.8, 117.7, 45.5","13C NMR conditions":"100.6 MHz, CDCl3","13C NMR data":"191.2, 155.9, 134.6, 131.1, 127.1, 120.8, 117.7, 45.5"}

Paragraphs:  N4-(4-methoxyphenyl)-7H-pyrrolo[2,3-d]pyrimidine-4-amine (6) Compound 6 was synthesised as described for 1 with 4-methoxyaniline and was obtained as a white solid (90%), TLC Rf 0.52 (CH2Cl2/CH3OH, 10:1), 1H NMR (400 MHz DMSO-d6) δ 3.73 (s, 3 H), 6.64 (d, J = 4 Hz, 1 H), 6.91 (d, J = 8 Hz, 2 H), 7.15–7.16 (m, J = 4 Hz, 1 H), 7.69 (d, J = 8 Hz, 2 H), 8.18 (s, 1 H), 9.12 (s, 1 H), 13C NMR (400 MHz DMSO-d6) δ 155.26, 154.30, 151.36, 151.18

 80%|████████  | 20/25 [02:01<00:24,  4.91s/it]

response:  {"IUPAC":"N4-(4-methoxyphenyl)-7H-pyrrolo[2,3-d]pyrimidine-4-amine","1H NMR text":"1H NMR (400 MHz DMSO-d6) δ 3.73 (s, 3 H), 6.64 (d, J = 4 Hz, 1 H), 6.91 (d, J = 8 Hz, 2 H), 7.15–7.16 (m, J = 4 Hz, 1 H), 7.69 (d, J = 8 Hz, 2 H), 8.18 (s, 1 H), 9.12 (s, 1 H)","1H NMR conditions":"400 MHz DMSO-d6","1H NMR data":"3.73, 6.64, 6.91, 7.15–7.16, 7.69, 8.18, 9.12","13C NMR text":"13C NMR (400 MHz DMSO-d6) δ 155.26, 154.30, 151.36, 151.18, 133.76, 122.89, 122.16, 114.15, 103.66, 99.23","13C NMR conditions":"400 MHz DMSO-d6","13C NMR data":"155.26, 154.30, 151.36, 151.18, 133.76, 122.89, 122.16, 114.15, 103.66, 99.23"}

Paragraphs:  4-((1E,4E)-5-(2,5-dimethoxyphenyl)-3-oxopenta-1,4-dien-1-yl)benzenesulfonamide (5d)Yellow solid, yield 38%; mp: 202–203 °C; IR (KBr, cm−1): νmax 3364.1, 3258.2, 2943.2, 2836.0, 1654.0, 1597.9, 1326.5, 1158.5, 1096.1; 1H NMR (500 MHz, DMSO-d6): δ 7.98 (d, J = 8.6 Hz, 2H), 7.87 (d, J = 8.5 Hz, 2H), 7.83–7.77 (m, 3H), 7.40 (d, J = 16.0 Hz, 2H), 7.21 (d, J = 

 84%|████████▍ | 21/25 [02:06<00:20,  5.01s/it]

response:  {"IUPAC":"4-((1E,4E)-5-(2,5-dimethoxyphenyl)-3-oxopenta-1,4-dien-1-yl)benzenesulfonamide","1H NMR text":"1H NMR (500 MHz, DMSO-d6): δ 7.98 (d, J = 8.6 Hz, 2H), 7.87 (d, J = 8.5 Hz, 2H), 7.83–7.77 (m, 3H), 7.40 (d, J = 16.0 Hz, 2H), 7.21 (d, J = 16.1 Hz, 1H), 7.08–7.04 (m, 3H), 3.85 (s, 3H), 3.79 (s, 3H)","1H NMR conditions":"500 MHz, DMSO-d6","1H NMR data":"7.98, 7.87, 7.83–7.77, 7.40, 7.21, 7.08–7.04, 3.85, 3.79","13C NMR text":"13C NMR (125 MHz, DMSO-d6): δ 188.7, 153.7, 153.2, 144.3, 143.3, 141.3, 138.0, 129.3, 126.6, 123.5, 118.5, 113.6, 113.5, 113.2, 56.5, 56.1","13C NMR conditions":"125 MHz, DMSO-d6","13C NMR data":"188.7, 153.7, 153.2, 144.3, 143.3, 141.3, 138.0, 129.3, 126.6, 123.5, 118.5, 113.6, 113.5, 113.2, 56.5, 56.1"}

Paragraphs:  3-(4-(Cyclopentyloxy)phenyl)-1-(4-methylphenyl)prop-2-en-1-one (4b)Crystallisation solvent, water; Yield, 85%; mp: 220–222 °C; IR (KBr) νmax/cm−1 1615 (C = O), 1540, 1535, 1525, 1480 (C = C). 1H NMR (CDCl3); δ: 7.85 (d, 4H, Ar-H, J = 

 88%|████████▊ | 22/25 [02:13<00:17,  5.74s/it]

response:  {"IUPAC":"3-(4-(Cyclopentyloxy)phenyl)-1-(4-methylphenyl)prop-2-en-1-one","1H NMR text":"1H NMR (CDCl3); δ: 7.85 (d, 4H, Ar-H, J = 8 Hz), 7.32 (d, 4H, Ar-H, J = 8 Hz), 7.18 (d, 1H, CH = CH, J = 8.4 Hz), 6.73 (d, 1H, CH = CH, J = 8.4 Hz), 4.80–4.72 (m, 1H, CH), 2.36 (s, 3H, CH3), 1.85–1.80 (m, 2H, CH2), 1.70–1.62 (m, 4H, 2CH2), 1.60–1.50 (m, 2H, CH2)","1H NMR conditions":"CDCl3","1H NMR data":"7.85, 7.32, 7.18, 6.73, 4.80–4.72, 2.36, 1.85–1.80, 1.70–1.62, 1.60–1.50","13C NMR text":"13C NMR (DMSO-d6); δ: 198.2, 155.8, 143.3, 135.6, 134.3, 129.1, 128.4, 128.0, 114.8, 78.3, 32.2, 23.5, 21.0","13C NMR conditions":"DMSO-d6","13C NMR data":"198.2, 155.8, 143.3, 135.6, 134.3, 129.1, 128.4, 128.0, 114.8, 78.3, 32.2, 23.5, 21.0"}

Paragraphs:   7-(9H-Carbazol-9-yl)heptane-1-thiol (5a). Yellow oil (17 mg, 25%). 1H NMR (400 MHz, CDCl3) δ: 8.14 (d, J = 7.6 Hz, 2H, Ar), 7.51–7.48 (m, 2H, Ar), 7.43 (d, J = 8.1 Hz, 2H, Ar), 7.27 (m, 2H, Ar), 4.33–4.31 (m, 2H, NCH2), 2.66–2.63 (m, 2H, CH2S),

 92%|█████████▏| 23/25 [02:18<00:10,  5.47s/it]

response:  {"IUPAC":"7-(9H-Carbazol-9-yl)heptane-1-thiol","1H NMR text":"1H NMR (400 MHz, CDCl3) δ: 8.14 (d, J = 7.6 Hz, 2H, Ar), 7.51–7.48 (m, 2H, Ar), 7.43 (d, J = 8.1 Hz, 2H, Ar), 7.27 (m, 2H, Ar), 4.33–4.31 (m, 2H, NCH2), 2.66–2.63 (m, 2H, CH2S), 1.92–1.86 (m, 2H, NCH2CH2), 1.66–1.64 (m, 2H, HSCH2CH2), 1.41–1.37 (m, 7H, 3CH2, SH) ppm","1H NMR conditions":"400 MHz, CDCl3","1H NMR data":"8.14, 7.51–7.48, 7.43, 7.27, 4.33–4.31, 2.66–2.63, 1.92–1.86, 1.66–1.64, 1.41–1.37","13C NMR text":"13C NMR (100 MHz, CDCl3) δ: 140.73, 126.45, 124.15, 121.68, 120.48, 109.63, 47.43, 33.25, 28.96, 28.61, 27.95, 27.55, 25.38 ppm","13C NMR conditions":"100 MHz, CDCl3","13C NMR data":"140.73, 126.45, 124.15, 121.68, 120.48, 109.63, 47.43, 33.25, 28.96, 28.61, 27.95, 27.55, 25.38"}

Paragraphs:  3-(3-Methoxyphenyl)benzo[4,5]imidazo[2,1-b]thiazole (4a)White crystals (yield 80%), m.p. 173–175 °C; 1H NMR (DMSO-d6) δ ppm: 3.84 (s, 3H, OCH3), 7.13 (d, 1H, Ar–H, J = 7.5 Hz), 7.20–7.32 (m, 6H, Ar–H), 7.54 (t, 1

 96%|█████████▌| 24/25 [02:23<00:05,  5.14s/it]

response:  {"IUPAC":"3-(3-Methoxyphenyl)benzo[4,5]imidazo[2,1-b]thiazole","1H NMR text":"1H NMR (DMSO-d6) δ ppm: 3.84 (s, 3H, OCH3), 7.13 (d, 1H, Ar–H, J = 7.5 Hz), 7.20–7.32 (m, 6H, Ar–H), 7.54 (t, 1H, Ar–H, J = 8.0 Hz), 7.71 (d, 1H, Ar–H, J = 8.0 Hz)","1H NMR conditions":"DMSO-d6","1H NMR data":"3.84, 7.13, 7.20–7.32, 7.54, 7.71","13C NMR text":"13C NMR (DMSO-d6) δ ppm: 55.86 (OCH3), 109.23, 112.01, 114.68, 116.44, 119.23, 120.86, 121.47, 123.61, 130.61, 130.69, 133.66, 148.64, 157.03, 159.92","13C NMR conditions":"DMSO-d6","13C NMR data":"55.86, 109.23, 112.01, 114.68, 116.44, 119.23, 120.86, 121.47, 123.61, 130.61, 130.69, 133.66, 148.64, 157.03, 159.92"}

Paragraphs:   2-(1H-imidazol-4-yl)-N-(4-methylbenzylidene)ethanamine (H3): Yield: 72%; colour: white powder, mp: 190–193 °C; FT-IR (cm−1): 1640 (–C=N–); 1H NMR (DMSO-d6, 300 MHz, δ ppm): 8.62 (s, 1H, –N=CH–), 7.88 (d, 1H, J = 1.0, H-2 Im), 7.78 (d, 2H, J = 8.2, Ar-H), 7.35 (s, 1H, H-5 Im), 7.28 (d, 2H, J = 8.2, Ar-H), 3.65 (t, 2H

100%|██████████| 25/25 [02:27<00:00,  5.92s/it]

response:  {"IUPAC":"2-(1H-imidazol-4-yl)-N-(4-methylbenzylidene)ethanamine","1H NMR text":"1H NMR (DMSO-d6, 300 MHz, δ ppm): 8.62 (s, 1H, –N=CH–), 7.88 (d, 1H, J = 1.0, H-2 Im), 7.78 (d, 2H, J = 8.2, Ar-H), 7.35 (s, 1H, H-5 Im), 7.28 (d, 2H, J = 8.2, Ar-H), 3.65 (t, 2H, J = 6.2, –CH2CH2-Im), 2.90 (t, 2H, J = 6.2, –CH2CH2-Im), 1.85 (s, 3H, –CH3)","1H NMR conditions":"DMSO-d6, 300 MHz, δ ppm","1H NMR data":"8.62, 7.88, 7.78, 7.35, 7.28, 3.65, 2.90, 1.85","13C NMR text":"13C NMR (DMSO-d6, 75 MHz, δ ppm): 166.41 (–N=CH–), 163.82, 133.51, 132.93, 131.65, 116.31, 115.64, 114.73, 111.42, 55.24, 26.72, 18.38","13C NMR conditions":"DMSO-d6, 75 MHz, δ ppm","13C NMR data":"166.41, 163.82, 133.51, 132.93, 131.65, 116.31, 115.64, 114.73, 111.42, 55.24, 26.72, 18.38"}





# 7. Save results

In [None]:
# Split the dict into dataframe (.csv)

csv_fp = open(prediction_result_path, "a+", encoding='utf-8', newline='')
writer = csv.writer(csv_fp)
writer.writerow(['IUPAC', '1H NMR text', '1H NMR conditions', '1H NMR data', '13C NMR text', '13C NMR conditions', '13C NMR data'])

for row in test_preds:
    predicion = json.loads(row)
    sheet_IUPAC = predicion['IUPAC']
    sheet_1H_NMR_text = predicion['1H NMR text']
    sheet_1H_NMR_conditions = predicion['1H NMR conditions']
    sheet_1H_NMR_data = predicion['1H NMR data']
    sheet_13C_NMR_text = predicion['13C NMR text']
    sheet_13C_NMR_conditions = predicion['13C NMR conditions']
    sheet_13C_NMR_data = predicion['13C NMR data']
    writer.writerow([sheet_IUPAC, 
                     sheet_1H_NMR_text, sheet_1H_NMR_conditions, sheet_1H_NMR_data, 
                     sheet_13C_NMR_text, sheet_13C_NMR_conditions, sheet_13C_NMR_data])
csv_fp.close()

# 8. Evaluate

In [12]:
ground_truth_IUPAC = []
ground_truth_1H_NMR = []
ground_truth_1H_NMR_solvent = []
ground_truth_1H_NMR_data = []
ground_truth_13C_NMR = []
ground_truth_13C_NMR_solvent = []
ground_truth_13C_NMR_data = []

predict_IUPAC = []
predict_1HNMR = []
predict_1HNMR_solvent = []
predict_1HNMR_data = []
predict_13CNMR = []
predict_13CNMR_solvent = []
predict_13CNMR_data = []

### Task 4: Paragraph2NMR

# ground truth
ground_truth = pd.read_csv(r'test_300.csv',encoding='utf-8')
ground_truth = ground_truth[:25].fillna('N/A')

for i,row in ground_truth.iterrows():
    ground_truth_13C_NMR.append(row['13C NMR text'])
    ground_truth_IUPAC.append(row['IUPAC'])
    ground_truth_1H_NMR.append(row['1H NMR text'])
    ground_truth_1H_NMR_solvent.append(row['1H NMR conditions'])
    ground_truth_1H_NMR_data.append(row['1H NMR data'])
    ground_truth_13C_NMR.append(row['13C NMR text'])
    ground_truth_13C_NMR_solvent.append(row['13C NMR conditions'])
    ground_truth_13C_NMR_data.append(row['13C NMR data'])

# prediction
prediction_df = pd.read_csv(prediction_result_path) # 
prediction_df = prediction_df.fillna('N/A')

for i,row in prediction_df.iterrows():
    predict_13CNMR.append(row['13C NMR text'])
    predict_IUPAC.append(row['IUPAC'])
    predict_1HNMR.append(row['1H NMR text'])
    predict_1HNMR_solvent.append(row['1H NMR conditions'])
    predict_1HNMR_data.append(row['1H NMR data'])
    predict_13CNMR.append(row['13C NMR text'])
    predict_13CNMR_solvent.append(row['13C NMR conditions'])
    predict_13CNMR_data.append(row['13C NMR data'])

def levenshtein_similarity(truth: List[str], pred: List[str]) -> float:
    assert len(truth) == len(pred)
    scores = sum(textdistance.levenshtein.normalized_similarity(t, p) for t, p in zip(truth, pred))
    return scores / len(truth)

def full_sentence_accuracy(truth: List[str], pred: List[str]) -> float:
    """Calculate the number of exact matches."""
    assert len(truth) == len(pred)
    correct_count = sum(int(t == p) for t, p in zip(truth, pred))
    return correct_count / len(truth)

# Evaluation
print("levenshtein_similarity_of_IUPAC: ", levenshtein_similarity(ground_truth_IUPAC, predict_IUPAC))
print("levenshtein_similarity_of_1HNMR_text: ", levenshtein_similarity(ground_truth_1H_NMR, predict_1HNMR))
print("levenshtein_similarity_of_1HNMR_conditions: ", levenshtein_similarity(ground_truth_1H_NMR_solvent, predict_1HNMR_solvent))
print("levenshtein_similarity_of_1HNMR_data: ", levenshtein_similarity(ground_truth_1H_NMR_data, predict_1HNMR_data))
print("levenshtein_similarity_of_13CNMR_text: ", levenshtein_similarity(ground_truth_13C_NMR, predict_13CNMR))
print("levenshtein_similarity_of_13CNMR_conditions: ", levenshtein_similarity(ground_truth_13C_NMR_solvent, predict_13CNMR_solvent))
print("levenshtein_similarity_of_13CNMR_data: ", levenshtein_similarity(ground_truth_13C_NMR_data, predict_13CNMR_data))
print()
print("full_sentence_accuracy_of_IUPAC: ", full_sentence_accuracy(ground_truth_IUPAC, predict_IUPAC))
print("full_sentence_accuracy_of_1HNMR_text: ", full_sentence_accuracy(ground_truth_1H_NMR, predict_1HNMR))
print("full_sentence_accuracy_of_1HNMR_conditions: ", full_sentence_accuracy(ground_truth_1H_NMR_solvent, predict_1HNMR_solvent))
print("full_sentence_accuracy_of_1HNMR_data: ", full_sentence_accuracy(ground_truth_1H_NMR_data, predict_1HNMR_data))
print("full_sentence_accuracy_of_13CNMR_text: ", full_sentence_accuracy(ground_truth_13C_NMR, predict_13CNMR))
print("full_sentence_accuracy_of_13CNMR_conditions: ", full_sentence_accuracy(ground_truth_13C_NMR_solvent, predict_13CNMR_solvent))
print("full_sentence_accuracy_of_13CNMR_data: ", full_sentence_accuracy(ground_truth_13C_NMR_data, predict_13CNMR_data))

levenshtein_similarity_of_IUPAC:  0.9155886049434437
levenshtein_similarity_of_1HNMR_text:  0.989070677192862
levenshtein_similarity_of_1HNMR_conditions:  0.9706946778711485
levenshtein_similarity_of_1HNMR_data:  0.9975917312661499
levenshtein_similarity_of_13CNMR_text:  0.9962408081442666
levenshtein_similarity_of_13CNMR_conditions:  0.9359047619047618
levenshtein_similarity_of_13CNMR_data:  0.9987763007907716

full_sentence_accuracy_of_IUPAC:  0.84
full_sentence_accuracy_of_1HNMR_text:  0.52
full_sentence_accuracy_of_1HNMR_conditions:  0.56
full_sentence_accuracy_of_1HNMR_data:  0.92
full_sentence_accuracy_of_13CNMR_text:  0.6
full_sentence_accuracy_of_13CNMR_conditions:  0.6
full_sentence_accuracy_of_13CNMR_data:  0.88
