# 正解データの作成

In [1]:
!pip install biopython


Collecting biopython
  Downloading biopython-1.84-cp312-cp312-win_amd64.whl.metadata (13 kB)
Downloading biopython-1.84-cp312-cp312-win_amd64.whl (2.8 MB)
   ---------------------------------------- 0.0/2.8 MB ? eta -:--:--
   --------------- ------------------------ 1.0/2.8 MB 5.6 MB/s eta 0:00:01
   --------------------------------- ------ 2.4/2.8 MB 5.8 MB/s eta 0:00:01
   ---------------------------------------- 2.8/2.8 MB 5.8 MB/s eta 0:00:00
Installing collected packages: biopython
Successfully installed biopython-1.84



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from Bio import Entrez
import csv

# Entrezの設定
Entrez.email = "youkiti@example.com"  # 必ず自分のメールアドレスを入力

# PMIDリスト
pmid_list = [
    "9971864", "30871608", "39382241", "38869931",
    "28906284", "21192282", "33591005", "15888852",
    "18442375", "27436190"
]

# 出力ファイル名
output_file = "pubmed_true_data.csv"

# データ取得関数
def fetch_pubmed_data(pmids):
    records = []
    for pmid in pmids:
        try:
            handle = Entrez.efetch(db="pubmed", id=pmid, rettype="xml", retmode="text")
            records.append(Entrez.read(handle))
            handle.close()
        except Exception as e:
            print(f"Error fetching PMID {pmid}: {e}")
    return records

# タイトルとアブストラクトの抽出
def extract_title_abstract(records):
    data = []
    for record in records:
        try:
            article = record["PubmedArticle"][0]["MedlineCitation"]["Article"]
            pmid = record["PubmedArticle"][0]["MedlineCitation"]["PMID"]
            title = article.get("ArticleTitle", "No Title")
            abstract = article.get("Abstract", {}).get("AbstractText", ["No Abstract"])[0]
            data.append([pmid, title, abstract])
        except Exception as e:
            print(f"Error processing record: {e}")
    return data

# データ取得と保存
def save_to_csv(data, filename):
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["PMID", "Title", "Abstract"])
        writer.writerows(data)

if __name__ == "__main__":
    records = fetch_pubmed_data(pmid_list)
    data = extract_title_abstract(records)
    save_to_csv(data, output_file)
    print(f"Data saved to {output_file}")


Data saved to pubmed_true_data.csv
