In [12]:
import os 
import sys
import json
import datetime
import logging
import pandas as pd 

In [20]:
# Load the autoreload extension
%load_ext autoreload

# Set autoreload mode to reload all modules before executing each cell
%autoreload 2

# Get the absolute path to the parent directory
parent_path = os.path.abspath("/Users/vince/Salk/OmicsCodex/")

# Add the parent directory to sys.path
if parent_path not in sys.path:
    sys.path.append(parent_path)

from src.semantic_scholar import SemanticScholarAPI

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
from src.config import SEMANTIC_SCHOLAR_API_KEY

JOURNALS_FILE = "journals_df.csv"

from rich.logging import RichHandler

def setup_logger(output_dir: str) -> logging.Logger:
    """Sets up the logger to log to both the console and a file."""
    log_file = os.path.join(output_dir, "test.log")
    os.makedirs(output_dir, exist_ok=True)

    logger = logging.getLogger("PaperGenerationPipeline")
    logger.setLevel(logging.INFO)

    log_format = "%(asctime)s [%(levelname)s] %(message)s"

    # Console Handler
    console_handler = RichHandler()
    console_handler.setLevel(logging.INFO)
    console_handler.setFormatter(logging.Formatter(log_format))
    logger.addHandler(console_handler)

    # File Handler
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.INFO)
    file_handler.setFormatter(logging.Formatter(log_format))
    logger.addHandler(file_handler)

    logger.info(f"Logging initialized. Log file: {log_file}")
    return logger

logger = setup_logger("logs")

semantic_client = SemanticScholarAPI(SEMANTIC_SCHOLAR_API_KEY, sjr_threshold=0, 
                                     min_citation_count=0, logger=logger)

semantic_client.load_journal_sjr_data(JOURNALS_FILE)

In [28]:
queries = [{"query": "trafficking protein particle complex 2B", 
            "section" : "test"}]

results = await semantic_client.query(index=0, queries=queries)

results

{'test': [Paper(section='temp', query='trafficking protein particle complex 2B', title='Trafficking protein particle complex 6A delta (TRAPPC6AΔ) is an extracellular plaque-forming protein in the brain', abstract='Tumor suppressor WWOX is involved in the progression of cancer and neurodegeneration. Here, we examined whether protein aggregation occurs in the brain of nondemented, middle-aged humans and whether this is associated with WWOX downregulation. We isolated an N-terminal internal deletion isoform, TPC6AΔ, derived from alternative splicing of the TRAPPC6A (TPC6A) gene transcript. TPC6AΔ proteins are present as aggregates or plaques in the extracellular matrix of the brain such as in the cortex. Filter retardation assays revealed that aggregate formation of TPC6AΔ occurs preceding Aβ generation in the hippocampi of middle-aged postmortem normal humans. In a Wwox gene knockout mouse model, we showed the plaques of pT181-Tau and TPC6AΔ in the cortex and hippocampus in 3-week-old mi