# Defining Types of Articles

In [1]:
systematic_meta_analysis = [
    "Meta-Analysis",
    "Network Meta-Analysis",
    "Systematic Review"
    ]
   

In [2]:
clinical_trial = [
    "Adaptive Clinical Trial",
    "Clinical Study",
    "Clinical Trial",
    "Clinical Trial Protocol",
    "Clinical Trial, Phase I",
    "Clinical Trial, Phase II",
    "Clinical Trial, Phase III",
    "Clinical Trial, Phase IV",
    "Clinical Trial, Veterinary",
    "Controlled Clinical Trial",
    "Equivalence Trial",
    "Multicenter Study",
    "Pragmatic Clinical Trial",
    "Randomized Controlled Trial",
    "Randomized Controlled Trial, Veterinary"
]


In [3]:
observational_studies = [
    "Case Reports",
    "Comparative Study",
    "Evaluation Study",
    "Observational Study",
    "Observational Study, Veterinary",
    "Twin Study",
    "Validation Study",
    "Review",
    "Scientific Integrity Review",
    "Scoping Review"
]


In [4]:
# Initialize an empty list
all_study_types = []

# Extend the list with other lists
all_study_types.extend(systematic_meta_analysis)
all_study_types.extend(clinical_trial)
all_study_types.extend(observational_studies)

allowed_study_types = all_study_types

# Print the combined list
print("Total Study types:",len(all_study_types))




Total Study types: 28


# Loading Data

In [5]:
import pandas as pd

# Read Excel file
df = pd.read_excel('Data/entrez_ingredients_with_synonyms.xlsx')  # For .xls or .xlsx

# View first 5 rows
df.head()

Unnamed: 0,Root Name,PMID,Title,Article Type,Publication Year,PubMed URL,Search Term(s)
0,Acetyl L-Carnitine,10022226,Micronutrients prevent cancer and delay aging.,"Journal Article; Research Support, Non-U.S. Go...",1998,https://pubmed.ncbi.nlm.nih.gov/10022226/,Carnitine
1,Acetyl L-Carnitine,10030388,Significance of skeletal muscle properties on ...,Comparative Study; Journal Article; Research S...,1999,https://pubmed.ncbi.nlm.nih.gov/10030388/,Carnitine
2,Acetyl L-Carnitine,10036643,Cellular dysmetabolism: the dark side of HIV-1...,Journal Article; Review,1996,https://pubmed.ncbi.nlm.nih.gov/10036643/,Carnitine
3,Acetyl L-Carnitine,10052020,Substrate utilization and work efficiency duri...,Clinical Trial; Controlled Clinical Trial; Jou...,1999,https://pubmed.ncbi.nlm.nih.gov/10052020/,Carnitine
4,Acetyl L-Carnitine,10067662,L-carnitine improves glucose disposal in type ...,Clinical Trial; Controlled Clinical Trial; Jou...,1999,https://pubmed.ncbi.nlm.nih.gov/10067662/,Carnitine


In [6]:
df.shape

(277397, 7)

# Adding Synonyms

In [6]:
import numpy as np

synonyms_df = pd.read_excel("76 Ingredient List.xlsx")
synonyms_df['Common Name(s)'] = synonyms_df['Common Name(s)'].astype(str).str.replace(',', ';')
synonyms_df['Other Search Words'] = synonyms_df['Other Search Words'].astype(str).str.replace(',', ';')
synonyms_df = synonyms_df.replace(['nan', np.nan], '') 
synonyms_df['Synonyms'] = (
    synonyms_df['Common Name(s)'].fillna('')
    .str.strip() + '; ' +
    synonyms_df['Scientific Name(s)'].fillna('')
    .str.strip() + '; ' +
    synonyms_df['Other Search Words'].fillna('')
    .str.strip()
)
synonyms_df.head()

Unnamed: 0,Ingredient,Common Name(s),Scientific Name(s),Other Search Words,Synonyms
0,Ashwagandha,Ashwagandha; Indian Ginseng; Winter Cherry,Withania somnifera,Withanolide; Withanolides,Ashwagandha; Indian Ginseng; Winter Cherry; Wi...
1,Chamomile,Chamomile; English Chamomile; German Chamomile...,Matricaria chamomilla; Chamaemelum nobile; Mat...,,Chamomile; English Chamomile; German Chamomile...
2,GABA,GABA,Gamma-Aminobutyric Acid,,GABA; Gamma-Aminobutyric Acid;
3,Glycine,Glycine; L-Glycine,Aminoacetic Acid,,Glycine; L-Glycine; Aminoacetic Acid;
4,Lemon Balm,Melissa; Balm Mint; Lemon Balm Extract,Melissa officinalis,,Melissa; Balm Mint; Lemon Balm Extract; Meliss...


In [7]:
df_merged = df.merge(
    synonyms_df[['Ingredient', 'Synonyms']],  # only the columns we need
    left_on='Root Name',               # key in df
    right_on='Ingredient',             # key in synonyms_df
    how='left'                         # keep all rows from df
)

# Drop duplicate 'Ingredient' column
df = df_merged.drop(columns=['Ingredient'])
df.head()

Unnamed: 0,Root Name,PMID,Title,Article Type,Publication Year,PubMed URL,Search Term(s),Synonyms
0,Acetyl L-Carnitine,10022226,Micronutrients prevent cancer and delay aging.,"Journal Article; Research Support, Non-U.S. Go...",1998,https://pubmed.ncbi.nlm.nih.gov/10022226/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...
1,Acetyl L-Carnitine,10030388,Significance of skeletal muscle properties on ...,Comparative Study; Journal Article; Research S...,1999,https://pubmed.ncbi.nlm.nih.gov/10030388/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...
2,Acetyl L-Carnitine,10036643,Cellular dysmetabolism: the dark side of HIV-1...,Journal Article; Review,1996,https://pubmed.ncbi.nlm.nih.gov/10036643/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...
3,Acetyl L-Carnitine,10052020,Substrate utilization and work efficiency duri...,Clinical Trial; Controlled Clinical Trial; Jou...,1999,https://pubmed.ncbi.nlm.nih.gov/10052020/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...
4,Acetyl L-Carnitine,10067662,L-carnitine improves glucose disposal in type ...,Clinical Trial; Controlled Clinical Trial; Jou...,1999,https://pubmed.ncbi.nlm.nih.gov/10067662/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...


# Defining Neuraci Type

In [8]:
# Define mapping of NeuraciTypes
priority_mapping = [
    (systematic_meta_analysis,'Systematic Review & Meta Analysis'), # 1st Priority
    (clinical_trial, 'Clinical Trials'),                            # 2nd Priority
    (observational_studies, 'Observational Studies & Reviews'),     # 3rd Priority

]

# Function to assign NeuraciType based on priority
def classify_type(type_list):
    for reference_list, category in priority_mapping:
        if any(item in reference_list for item in type_list):
            return category
    return 'Unclassified'  # optional, in case nothing matches

# Function to assign NeuraciType based on priority and return the matched item
# def classify_type(type_list):
#     for reference_list, category in priority_mapping:
#         for item in type_list:
#             if item in reference_list:
#                 return category, item  # Return both category and matched item
#     return 'Unclassified', None  # Default if nothing matches


# Create new column with a list of article types
df['Type_list'] = df['Article Type'].apply(lambda x: set([item.strip() for item in x.split(';')]))


# Apply function to create new column
#df['PubMedType'] = df['Type_list'].apply(lambda x: x.intersection(set(allowed_study_types)))
df['PubMedType'] = df['Type_list'].apply(
    lambda x: ', '.join(x.intersection(set(allowed_study_types))) if isinstance(x, set) else ''
)
df["NeuraciType"] = df['Type_list'].apply(lambda x: pd.Series(classify_type(x)))

df.head()

Unnamed: 0,Root Name,PMID,Title,Article Type,Publication Year,PubMed URL,Search Term(s),Synonyms,Type_list,PubMedType,NeuraciType
0,Acetyl L-Carnitine,10022226,Micronutrients prevent cancer and delay aging.,"Journal Article; Research Support, Non-U.S. Go...",1998,https://pubmed.ncbi.nlm.nih.gov/10022226/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...,"{Journal Article, Review, Research Support, No...",Review,Observational Studies & Reviews
1,Acetyl L-Carnitine,10030388,Significance of skeletal muscle properties on ...,Comparative Study; Journal Article; Research S...,1999,https://pubmed.ncbi.nlm.nih.gov/10030388/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...,"{Journal Article, Comparative Study, Research ...",Comparative Study,Observational Studies & Reviews
2,Acetyl L-Carnitine,10036643,Cellular dysmetabolism: the dark side of HIV-1...,Journal Article; Review,1996,https://pubmed.ncbi.nlm.nih.gov/10036643/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...,"{Journal Article, Review}",Review,Observational Studies & Reviews
3,Acetyl L-Carnitine,10052020,Substrate utilization and work efficiency duri...,Clinical Trial; Controlled Clinical Trial; Jou...,1999,https://pubmed.ncbi.nlm.nih.gov/10052020/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...,"{Journal Article, Research Support, Non-U.S. G...","Controlled Clinical Trial, Clinical Trial",Clinical Trials
4,Acetyl L-Carnitine,10067662,L-carnitine improves glucose disposal in type ...,Clinical Trial; Controlled Clinical Trial; Jou...,1999,https://pubmed.ncbi.nlm.nih.gov/10067662/,Carnitine,Carnitine; ALCAR; Acetylcarnitine; Acetyl L Ca...,"{Journal Article, Controlled Clinical Trial, C...","Controlled Clinical Trial, Clinical Trial",Clinical Trials


In [10]:
ingredients = ["Cedarwood"]


In [20]:
from UPDATED_meta_data_generation import process_pmids

#ingredient_df = df[df['Root Name'].isin([10473175])]
ingredient_df = df[df['Root Name'].isin(ingredients)]
#ingredient_df = df[(df['Root Name'].isin(ingredients)) & (df['NeuraciType'] == 'Clinical Trials')]
#ingredient_df = duplicates_df[duplicates_df['PMID'].isin(pmids)]
process_pmids(ingredient_df['Root Name'],ingredient_df['Search Term(s)'],ingredient_df['Synonyms'],ingredient_df['PMID'], ingredient_df['PubMedType'],output_file="cedarwood_10|7.json") 

Processing PMIDs:   0%|          | 0/97 [00:00<?, ?pmid/s]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   1%|          | 1/97 [00:07<11:31,  7.21s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   2%|▏         | 2/97 [00:12<09:12,  5.82s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   3%|▎         | 3/97 [00:18<09:50,  6.28s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   4%|▍         | 4/97 [00:29<12:13,  7.89s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   5%|▌         | 5/97 [00:34<10:53,  7.11s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   6%|▌         | 6/97 [00:39<09:24,  6.20s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   7%|▋         | 7/97 [00:46<09:48,  6.54s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   8%|▊         | 8/97 [00:51<08:51,  5.97s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:   9%|▉         | 9/97 [00:55<07:55,  5.40s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  10%|█         | 10/97 [01:09<11:34,  7.98s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  11%|█▏        | 11/97 [01:17<11:25,  7.97s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  12%|█▏        | 12/97 [01:28<12:34,  8.88s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  13%|█▎        | 13/97 [01:34<11:26,  8.18s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  14%|█▍        | 14/97 [01:39<09:57,  7.20s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  15%|█▌        | 15/97 [01:46<09:31,  6.97s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  16%|█▋        | 16/97 [01:51<08:55,  6.61s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  18%|█▊        | 17/97 [01:55<07:47,  5.84s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  19%|█▊        | 18/97 [02:01<07:33,  5.74s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  20%|█▉        | 19/97 [02:05<06:52,  5.28s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  21%|██        | 20/97 [02:08<06:00,  4.68s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  22%|██▏       | 21/97 [02:11<05:06,  4.04s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  23%|██▎       | 22/97 [02:23<07:52,  6.30s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  24%|██▎       | 23/97 [02:27<07:09,  5.80s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  25%|██▍       | 24/97 [02:37<08:27,  6.95s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  26%|██▌       | 25/97 [02:46<09:11,  7.66s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  27%|██▋       | 26/97 [02:58<10:25,  8.82s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  28%|██▊       | 27/97 [03:02<08:32,  7.32s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  30%|██▉       | 29/97 [03:07<05:30,  4.85s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  31%|███       | 30/97 [03:11<04:55,  4.41s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  32%|███▏      | 31/97 [03:18<05:42,  5.19s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  33%|███▎      | 32/97 [03:31<08:14,  7.61s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  34%|███▍      | 33/97 [03:34<06:36,  6.20s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  36%|███▌      | 35/97 [03:44<05:24,  5.23s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  37%|███▋      | 36/97 [03:48<04:56,  4.87s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  39%|███▉      | 38/97 [03:53<03:31,  3.59s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  40%|████      | 39/97 [03:58<03:42,  3.83s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  41%|████      | 40/97 [04:12<06:36,  6.96s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  42%|████▏     | 41/97 [04:16<05:44,  6.14s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  44%|████▍     | 43/97 [04:21<03:38,  4.04s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  45%|████▌     | 44/97 [04:30<05:01,  5.68s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  46%|████▋     | 45/97 [04:35<04:42,  5.42s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  47%|████▋     | 46/97 [04:53<07:44,  9.11s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  48%|████▊     | 47/97 [04:59<06:47,  8.14s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  49%|████▉     | 48/97 [05:13<08:16, 10.14s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  51%|█████     | 49/97 [05:17<06:33,  8.20s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  52%|█████▏    | 50/97 [05:30<07:34,  9.67s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  53%|█████▎    | 51/97 [05:33<05:57,  7.76s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  54%|█████▎    | 52/97 [05:41<05:50,  7.80s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  55%|█████▍    | 53/97 [05:54<06:51,  9.36s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  56%|█████▌    | 54/97 [06:01<06:01,  8.40s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  57%|█████▋    | 55/97 [06:04<04:55,  7.04s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  58%|█████▊    | 56/97 [06:11<04:49,  7.05s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  59%|█████▉    | 57/97 [06:21<05:08,  7.70s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  60%|█████▉    | 58/97 [06:23<04:02,  6.22s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  61%|██████    | 59/97 [06:29<03:53,  6.14s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  62%|██████▏   | 60/97 [06:43<05:12,  8.44s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  63%|██████▎   | 61/97 [06:48<04:27,  7.42s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  64%|██████▍   | 62/97 [06:52<03:40,  6.29s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  65%|██████▍   | 63/97 [06:56<03:09,  5.57s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  68%|██████▊   | 66/97 [07:05<01:47,  3.47s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  69%|██████▉   | 67/97 [07:09<01:47,  3.60s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  70%|███████   | 68/97 [07:13<01:50,  3.82s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  71%|███████   | 69/97 [07:19<01:58,  4.24s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  72%|███████▏  | 70/97 [07:23<02:00,  4.45s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  73%|███████▎  | 71/97 [07:27<01:51,  4.31s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  74%|███████▍  | 72/97 [07:32<01:47,  4.28s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  75%|███████▌  | 73/97 [07:47<03:00,  7.53s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  76%|███████▋  | 74/97 [07:51<02:29,  6.51s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  77%|███████▋  | 75/97 [08:00<02:38,  7.20s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  78%|███████▊  | 76/97 [08:04<02:12,  6.30s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  79%|███████▉  | 77/97 [08:11<02:11,  6.60s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  80%|████████  | 78/97 [08:16<01:54,  6.01s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  81%|████████▏ | 79/97 [08:21<01:45,  5.88s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  82%|████████▏ | 80/97 [08:25<01:26,  5.07s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  84%|████████▎ | 81/97 [08:30<01:20,  5.05s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  85%|████████▍ | 82/97 [08:33<01:08,  4.59s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  86%|████████▌ | 83/97 [08:37<00:59,  4.25s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  87%|████████▋ | 84/97 [08:43<01:02,  4.82s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  88%|████████▊ | 85/97 [08:50<01:07,  5.63s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  89%|████████▊ | 86/97 [08:55<00:57,  5.24s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  90%|████████▉ | 87/97 [09:06<01:10,  7.07s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  91%|█████████ | 88/97 [09:11<00:58,  6.47s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  92%|█████████▏| 89/97 [09:14<00:43,  5.41s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  93%|█████████▎| 90/97 [09:16<00:31,  4.55s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  94%|█████████▍| 91/97 [09:20<00:25,  4.24s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  95%|█████████▍| 92/97 [09:33<00:34,  6.81s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  96%|█████████▌| 93/97 [09:36<00:23,  5.87s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  97%|█████████▋| 94/97 [09:40<00:15,  5.12s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs:  99%|█████████▉| 96/97 [09:47<00:04,  4.02s/pmid]

Synonyms Cedarwood; Cedrus; 


Processing PMIDs: 100%|██████████| 97/97 [09:51<00:00,  6.10s/pmid]


[{'root_name': 'Cedarwood',
  'search_term': 'Cedrus',
  'synonyms': 'Cedarwood; Cedrus; ',
  'PMID': 10473175,
  'pubmed_type': 'Comparative Study',
  'metadata': {'duration_days': None,
   'sample_size': None,
   'sample_gender': ['unspecified'],
   'species': ['other'],
   'population': 'turkish plants used in folk medicine for gastric ailments',
   'study_type': ['unspecified'],
   'focus': ['secondary'],
   'benefits': ['anti-helicobacter pylori effect'],
   'synergies_interactions_positive': [],
   'synergies_interactions_negative': [],
   'safety_side_effects': [],
   'interventions': [{'ingredient': 'cedrus libani',
     'daily_dosage': None,
     'units': '',
     'original_text': 'cones of cedrus libani'}],
   'usage': ['unspecified'],
   'conditions': ['gastric ailments', 'peptic ulcers'],
   'biomarkers': [],
   'functions': [],
   'purpose': 'to study the anti-helicobacter pylori effect of extracts from turkish plants used in folk medicine for gastric ailments.',
   'concl

# Enriching Data

In [23]:
import json

ingredient_df_lookup = ingredient_df.set_index(["PMID",'Root Name'])[["Publication Year","NeuraciType","Synonyms","PubMed URL"]].to_dict(orient="index")


# Read JSON file
with open("cedarwood_10|7.json", "r", encoding="utf-8") as f:
    json_list = json.load(f)

# Add attributes 
for record in json_list:
    pmid = record["PMID"]
    root_name = record['root_name']
    key = (pmid,root_name)
    if key in ingredient_df_lookup:
        lookup_data = ingredient_df_lookup[key]
        record.setdefault("metadata", {})  # ensure "metadata" exists
        record["metadata"]["published_year"] = lookup_data["Publication Year"]
        record["metadata"]["neuraci_type"] = lookup_data["NeuraciType"]
        #record["metadata"]["synonyms"] = lookup_data["Synonyms"]
        record["metadata"]["url"] = lookup_data["PubMed URL"]

# Save JSON file
with open("cedarwood_modified_enriched.json", "w", encoding="utf-8") as f:
    json.dump(json_list, f, indent=2, ensure_ascii=False)

# Converting into Structured Data

In [24]:
import json
import pandas as pd

# # --- Step 1: Load JSON file ---
# with open("data.json", "r", encoding="utf-8") as f:
#     records = json.load(f)   # assuming it's a list of JSON objects

rows = []
records = json_list
# --- Step 2: Flatten each record ---
for rec in records:
    pmid = rec.get("PMID")
    root_name = rec.get("root_name")
    search_term = rec.get("search_term")
    meta = rec.get("metadata", {})
    pubmed_type = rec.get("pubmed_type")
    synonyms = rec.get('synonyms')

    row = {
        "root_name":root_name,
        "Search Term(s)":search_term,
        "PMID": pmid,
        "synonyms":synonyms,
        "published_year": meta.get("published_year"),
        "neuraci_type": meta.get("neuraci_type"),
        "pubmed_type":pubmed_type,
        "study_type": ";".join(meta.get('study_type')),
        "duration_days": meta.get("duration_days"),
        "sample_size": meta.get("sample_size"),
        "sample_gender": ";".join(meta.get("sample_gender", [])),
        "species": ";".join(meta.get("species", [])),
        "population": meta.get("population"),
        "purpose": meta.get("purpose"),
        "focus": "".join(meta.get("focus")),
        "benefits": ";".join(meta.get("benefits", [])),
        "synergies_interactions_positive": ";".join(meta.get("synergies_interactions_positive", [])),
        "synergies_interactions_negative": ";".join(meta.get("synergies_interactions_negative", [])),
        "safety_side_effects": ";".join(meta.get("safety_side_effects", [])),
        "conditions": ";".join(meta.get("conditions", [])),
        "biomarkers": ";".join(meta.get("biomarkers", [])),
        "symptoms": ";".join(meta.get("symptoms", [])),
        "keywords": ";".join(meta.get("keywords", [])),
        "diseases": ";".join(meta.get("diseases", [])),
        "mechanism": ";".join(meta.get("mechanism", [])),
        "usage": ";".join(meta.get("usage", [])),
        "conclusion":meta.get("conclusion"),
        "url": meta.get("url", []),
    }

    # --- Expand interventions ---
    for i, iv in enumerate(meta.get("interventions", []), start=1):
        row[f"intervention{i}_ingredient"] = iv.get("ingredient")
        row[f"intervention{i}_daily_dosage"] = iv.get("daily_dosage")
        row[f"intervention{i}_units"] = iv.get("units")
        row[f"intervention{i}_original_text"] = iv.get("original_text")

    # --- Expand outcomes ---
    for j, oc in enumerate(meta.get("outcomes", []), start=1):
        row[f"outcome{j}_name"] = oc.get("name")
        row[f"outcome{j}_domain"] = oc.get("domain")
        row[f"outcome{j}_type"] = oc.get("type")
        row[f"outcome{j}_result"] = oc.get("result")

    rows.append(row)

# --- Step 3: Convert to DataFrame ---
output_df = pd.DataFrame(rows)

# --- Step 4: Save to CSV ---
output_df.to_csv("Cedarwood_modified_output.csv", index=False)
output_df.head()

Unnamed: 0,root_name,Search Term(s),PMID,synonyms,published_year,neuraci_type,pubmed_type,study_type,duration_days,sample_size,...,outcome17_type,outcome17_result,outcome18_name,outcome18_domain,outcome18_type,outcome18_result,outcome19_name,outcome19_domain,outcome19_type,outcome19_result
0,Cedarwood,Cedrus,10473175,Cedarwood; Cedrus;,1999,Observational Studies & Reviews,Comparative Study,unspecified,,,...,,,,,,,,,,
1,Cedarwood,Cedrus,11524119,Cedarwood; Cedrus;,2001,Observational Studies & Reviews,Comparative Study,unspecified,,137.0,...,,,,,,,,,,
2,Cedarwood,Cedrus,11876259,Cedarwood; Cedrus;,2002,Observational Studies & Reviews,"Comparative Study, Review",review,,,...,,,,,,,,,,
3,Cedarwood,Cedrus,11876600,Cedarwood; Cedrus;,2002,Clinical Trials,"Controlled Clinical Trial, Clinical Trial",non-randomized trial,730.0,23.0,...,,,,,,,,,,
4,Cedarwood,Cedrus,11896972,Cedarwood; Cedrus;,2002,Observational Studies & Reviews,Case Reports,unspecified,,3.0,...,,,,,,,,,,


In [25]:
pd.set_option('display.max_columns', None)

In [26]:
output_df.head()

Unnamed: 0,root_name,Search Term(s),PMID,synonyms,published_year,neuraci_type,pubmed_type,study_type,duration_days,sample_size,sample_gender,species,population,purpose,focus,benefits,synergies_interactions_positive,synergies_interactions_negative,safety_side_effects,conditions,biomarkers,symptoms,keywords,diseases,mechanism,usage,conclusion,url,intervention1_ingredient,intervention1_daily_dosage,intervention1_units,intervention1_original_text,outcome1_name,outcome1_domain,outcome1_type,outcome1_result,outcome2_name,outcome2_domain,outcome2_type,outcome2_result,outcome3_name,outcome3_domain,outcome3_type,outcome3_result,outcome4_name,outcome4_domain,outcome4_type,outcome4_result,outcome5_name,outcome5_domain,outcome5_type,outcome5_result,outcome6_name,outcome6_domain,outcome6_type,outcome6_result,outcome7_name,outcome7_domain,outcome7_type,outcome7_result,intervention2_ingredient,intervention2_daily_dosage,intervention2_units,intervention2_original_text,outcome8_name,outcome8_domain,outcome8_type,outcome8_result,outcome9_name,outcome9_domain,outcome9_type,outcome9_result,intervention3_ingredient,intervention3_daily_dosage,intervention3_units,intervention3_original_text,intervention4_ingredient,intervention4_daily_dosage,intervention4_units,intervention4_original_text,intervention5_ingredient,intervention5_daily_dosage,intervention5_units,intervention5_original_text,outcome10_name,outcome10_domain,outcome10_type,outcome10_result,outcome11_name,outcome11_domain,outcome11_type,outcome11_result,outcome12_name,outcome12_domain,outcome12_type,outcome12_result,outcome13_name,outcome13_domain,outcome13_type,outcome13_result,outcome14_name,outcome14_domain,outcome14_type,outcome14_result,outcome15_name,outcome15_domain,outcome15_type,outcome15_result,outcome16_name,outcome16_domain,outcome16_type,outcome16_result,outcome17_name,outcome17_domain,outcome17_type,outcome17_result,outcome18_name,outcome18_domain,outcome18_type,outcome18_result,outcome19_name,outcome19_domain,outcome19_type,outcome19_result
0,Cedarwood,Cedrus,10473175,Cedarwood; Cedrus;,1999,Observational Studies & Reviews,Comparative Study,unspecified,,,unspecified,other,turkish plants used in folk medicine for gastr...,to study the anti-helicobacter pylori effect o...,secondary,anti-helicobacter pylori effect,,,,gastric ailments;peptic ulcers,,,anti-helicobacter pylori;turkish plants;folk m...,helicobacter pylori infection,,unspecified,the study found that extracts from six turkish...,https://pubmed.ncbi.nlm.nih.gov/10473175/,cedrus libani,,,cones of cedrus libani,anti-helicobacter pylori effect,condition,primary,improved,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,Cedarwood,Cedrus,11524119,Cedarwood; Cedrus;,2001,Observational Studies & Reviews,Comparative Study,unspecified,,137.0,unspecified,other,137 species of gymnosperms belonging to 14 fam...,to determine the fatty acid composition of pho...,secondary,,,,,,fatty acid composition;chloroplastic lipids;di...,,gymnosperms;fatty acid composition;cedrus;pina...,,,unspecified,"a clear-cut separation of the genera abies, la...",https://pubmed.ncbi.nlm.nih.gov/11524119/,,,,,fatty acid composition,biomarker,primary,not_reported,chloroplastic lipids,biomarker,secondary,not_reported,digalactosyldiacylglycerol,biomarker,secondary,not_reported,monogalactosyldiacylglycerol,biomarker,secondary,not_reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Cedarwood,Cedrus,11876259,Cedarwood; Cedrus;,2002,Observational Studies & Reviews,"Comparative Study, Review",review,,,,other,abietoid seed fatty acid compositions of the g...,to review the seed fatty acid compositions of ...,secondary,,,,,,delta5-olefinic acids;taxoleic acid;pinolenic ...,,abietoid;seed fatty acid;pinaceae;delta5-olefi...,,,,the study suggests that seed fatty acid compos...,https://pubmed.ncbi.nlm.nih.gov/11876259/,,,,,delta5-olefinic acids,biomarker,secondary,not_reported,taxoleic acid,biomarker,secondary,not_reported,pinolenic acid,biomarker,secondary,not_reported,coniferonic acid,biomarker,secondary,not_reported,keteleeronic acid,biomarker,secondary,not_reported,sciadonic acid,biomarker,secondary,not_reported,14-methyl hexadecanoic acid,biomarker,secondary,not_reported,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,Cedarwood,Cedrus,11876600,Cedarwood; Cedrus;,2002,Clinical Trials,"Controlled Clinical Trial, Clinical Trial",non-randomized trial,730.0,23.0,unspecified,humans,patients with japanese cedar pollinosis,to evaluate the effect of japanese cedar-speci...,none,,,,,japanese cedar pollinosis,il-4;il-5;il-13;ifn-gamma,,japanese cedar;immunotherapy;cytokine producti...,allergic rhinitis,,unspecified,specific immunotherapy for japanese cedar poll...,https://pubmed.ncbi.nlm.nih.gov/11876600/,cs-560,,,cs-560,japanese cedar pollinosis,condition,primary,improved,il-4,biomarker,secondary,improved,il-5,biomarker,secondary,improved,il-13,biomarker,secondary,improved,ifn-gamma,biomarker,secondary,no_effect,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,Cedarwood,Cedrus,11896972,Cedarwood; Cedrus;,2002,Observational Studies & Reviews,Case Reports,unspecified,,3.0,unspecified,animals,dogs with recurrent pruritic dermatitis,to investigate the sensitivity of dogs to japa...,none,,,,,atopic dermatitis,ige specific to cry j 1;ige specific to cry j 2,,atopic dermatitis;japanese cedar;cryptomeria j...,,type i hypersensitivity,,cry j 1 was identified as the major allergen r...,https://pubmed.ncbi.nlm.nih.gov/11896972/,,,,,atopic dermatitis,condition,primary,improved,ige specific to cry j 1,biomarker,primary,improved,ige specific to cry j 2,biomarker,secondary,no_effect,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
