In [None]:
pip install biopython

In [None]:
pip install matplotlib

In [None]:
from Bio import Entrez
import pandas as pd
import time

Entrez.email = "victoriadeleray@gmail.com"

methods = {
    "circular dichroism": '"circular dichroism"',
    "x-ray crystallography OR crystalline sponges": '("x-ray crystallography" OR "crystalline sponges")',
    "mass spectrometry": '"mass spectrometry"',
    "nuclear magnetic resonance": '("nuclear magnetic resonance" OR "NMR")'
}

years = list(range(2000, 2026))

results = []

for method_label, method_query in methods.items():
    for year in years:
        query = (
            f'natural product AND {method_query} '
            f'AND ("{year}/01/01"[Publication Date] : "{year}/12/31"[Publication Date]) '
            f'NOT review[Publication Type]'
        )
        try:
            handle = Entrez.esearch(db="pmc", term=query, rettype="count")
            record = Entrez.read(handle)
            count = int(record["Count"])
            results.append({
                "Year": year,
                "Method": method_label,
                "Query": query,
                "Result Count": count
            })
            print(f"{year} - {method_label}: {count}")
            time.sleep(0.4) 
        except Exception as e:
            print(f"Error with {method_label} in {year}: {e}")
            results.append({
                "Year": year,
                "Method": method_label,
                "Query": query,
                "Result Count": "ERROR"
            })

df = pd.DataFrame(results)
df.to_csv(r"/Volumes/SharedFolder/victoria/np_pubmed/test.csv", index=False)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv("/Volumes/SharedFolder/victoria/np_pubmed/test.csv")

df_pivot = df.pivot(index="Year", columns="Method", values="Result Count")

df_pivot = df_pivot.apply(pd.to_numeric, errors='coerce')

df_cumulative = df_pivot.cumsum()

df_cumulative.columns = df_cumulative.columns.str.strip().str.lower()

color_map = {
    "mass spectrometry": "#e88835",
    "nuclear magnetic resonance": "#62a063",
    "x-ray crystallography": "#d52b29",
    "crystalline sponges": "#d52b29",
    "circular dichroism": "#e35e4e"
}

marker_map = {
    "mass spectrometry": "o",         
    "nuclear magnetic resonance": "s",
    "x-ray crystallography": "D",     
    "crystalline sponges": "^",      
    "circular dichroism": "P"    
}

plt.figure(figsize=(12, 6))

for method in df_cumulative.columns:
    color = color_map.get(method, None)
    marker = marker_map.get(method, "o")
    plt.plot(df_cumulative.index, df_cumulative[method], marker=marker, label=method.title(), color=color)

plt.title('Cumulative Full-Text PMC Publication Count by Method (Natural Products)')
plt.xlabel('Year')
plt.ylabel('Cumulative Article Count')
plt.legend(title="Method", loc="upper left", bbox_to_anchor=(1, 1))
plt.grid(True)
plt.tight_layout()

plt.savefig("/Users/victoriadeleray/Desktop/Opinions article/test.pdf", format="pdf")

plt.show()
